R/sec_2.R

Defines functions .get_cik_url_df munge_tbl build_address .build_address

Documented in build_address munge_tbl

.build_address <-
  function(data, end_slug, end_slugs, address_parts, return_message = T) {

    if (return_message) {
      glue("Building location for {end_slug}") %>% message()
    }

    parts <-
      address_parts[address_parts %>% str_detect(end_slug)]

    remove_parts <-
      end_slugs[!end_slugs %in% end_slug] %>% str_c(collapse = "|")

    if (!end_slug %>% str_detect("Mailing|Alternate|Alt") & remove_parts != "") {
      parts <-
        parts %>% str_remove_all(remove_parts)
    }

    parts <- parts[parts %>% str_detect(end_slug)]
    new_col <- glue("location{end_slug}") %>% as.character()

    if (data %>% hasName(new_col)) {
      return(data)
    }

    city_state <- glue("cityState{end_slug}") %>% as.character()
    address <-
      parts[parts %>% str_detect("addressStreet|address_street")]
    if (length(address) > 0)  {
      address <- address[[1]]
    }
    address1 <- parts[parts %>% str_detect("addressStreet1|address_street_1")]
    if (length(address1) > 0)  {
      address1 <- address1[[1]]
    }


    address2 <- parts[parts %>% str_detect("addressStreet2|address_street_2")]

    if (length(address2) > 0)  {
      address2 <- address2[[1]]
    }

    city <- parts[parts %>% str_detect("city|City")]

    if (length(city) > 0)  {
      city <- city[[1]]
    }


    state <- parts[parts %>% str_detect("state|State")]

    if (length(state) > 0)  {
      state <- state[[1]]
    }


    zip <-
      parts[parts %>% str_detect("zip")]
    zip <- zip[!zip %>% str_detect("zipcode4|zip4")]

    if (length(zip) > 0)  {
      zip <- zip[[1]]
    }


    country <- parts[parts %>% str_detect("country")]

    if (length(country) > 0)  {
      country <- country[[1]]
    }

    df_locs <-
      data %>%
      select(one_of(address, address1, address2, city, state, zip, country)) %>%
      distinct()

    if (length(city) + length(state) == 2) {
      df_locs <-
        df_locs %>%
        unite(!!sym(city_state),
              city,
              state,
              sep = ", ",
              ,
              remove = F) %>%
        filter(!!sym(city_state) != "NA, NA")

      df_locs <-
        df_locs %>%
        mutate_if(is.character,
                  list(function(x) {
                    x %>% coalesce("")
                  })) %>%
        unite(
          !!sym(new_col),
          c(address, city_state, zip, country),
          sep = " ",
          remove = F
        ) %>%
        mutate_at(new_col, str_squish) %>%
        mutate_if(is.character,
                  list(function(x) {
                    case_when(x == "" ~ NA_character_,
                              TRUE ~ x)
                  }))

    } else {
      df_locs <-
        df_locs %>%
        mutate_if(is.character,
                  list(function(x) {
                    x %>% coalesce("")
                  })) %>%
        unite(
          !!sym(new_col),
          c(address, city, state, zip, country),
          sep = " ",
          remove = F
        ) %>%
        mutate_at(new_col, str_squish) %>%
        mutate_if(is.character,
                  list(function(x) {
                    case_when(x == "" ~ NA_character_,
                              TRUE ~ x)
                  }))
    }


    join_cols <- names(df_locs)[names(df_locs) %in% names(data)]

    data <-
      data %>%
      left_join(df_locs, by = join_cols)

    data
  }



#' Build Address from tibble
#'
#'
#' @param data \code{tibble}
#' @param return_message if \code{TRUE} returns a message
#' @param address_search_slugs vector of slugs identifying address features - defaults to `c("^address", "^streetAddress", "^city", "^state", "^codeState", "^codeCountry", "^country", "^zipcode")`
#' @param include_snake_versions `TRUE` includes snaked version of names
#' @param part_threshold minimum number of matches
#' @param snake_names if \code{TRUE} snakes names
#'
#' @return
#' @export
#'
#' @examples
build_address <-
  function(data,
           address_search_slugs = c("^address", "^streetAddress", "^city", "^state", "^codeState", "^codeCountry", "^country", "^zipcode"),
           include_snake_versions = T,
           part_threshold = 3,
           snake_names = F,
           return_message = T) {

    if (include_snake_versions) {
      clean_n <- address_search_slugs %>% make_clean_names()
      clean_n <- glue("^{clean_n}") %>% as.character()
      address_search_slugs <- c(address_search_slugs,clean_n)  %>% unique()
    }


    address_slugs <-
      str_c(address_search_slugs, collapse = "|")

    address_parts <-
      data %>% select(matches(address_slugs)) %>% names()

    if (length(address_parts) == 0) {
      return(data)
    }
    end_slugs <-
      tibble(part = address_parts %>%
               str_remove_all(address_slugs)) %>%
      count(part, sort = T) %>%
      filter(n >= part_threshold) %>%
      pull(part)

    end_slugs %>%
      walk(function(x) {
        data <<-
          .build_address(
            data = data,
            end_slug = x,
            end_slugs = end_slugs,
            address_parts = address_parts,
            return_message = return_message
          )
      })

    if (snake_names) {
      data <- data %>% clean_names()
    }

    data
  }


#' Munge a tibble
#'
#' @param data a \code{tibble}
#' @param snake_names if \code{TRUE} returns snake case names
#' @param unformat if \code{TRUE} no formattable digits
#' @param convert_case if \code{TRUE} normalizes non url character columns to upper
#' @param amount_digits formattable digits
#' @param include_address if \code{TRUE} builds addresses
#'
#' @return
#' @export
#'
#' @examples
munge_tbl <-
  function(data, snake_names = F, unformat = F, convert_case = T,
           amount_digits = 2,
           include_address = T) {

    data <- data %>%
      mutate_if(is.character,
                list(function(x) {
                  x %>% str_squish()
                })) %>%
      mutate_if(is.character,
                list(function(x) {
                  case_when(x == "" ~ NA_character_,
                            TRUE ~ x)
                }))

    is_has <-
      data %>%
      select_if(is.character) %>%
      dplyr::select(dplyr::matches("^is|^has")) %>% names()


    if (length(is_has) > 0) {
      data <- data %>%
        mutate_at(is_has,
                  list(function(x){
                    case_when(x %in% c("Y", "YES", "TRUE", "1") ~ TRUE,
                              TRUE ~ FALSE)
                  }))
    }

    to_num <-
      data %>%
      select_if(is.character) %>%
      select(matches("amount|price|value|ratio|count[A-Z]|number|shares")) %>%
      select(-matches("country|county")) %>%
      names()

    if (length(to_num) > 0) {
      data <- data %>%
        mutate_at(to_num, readr::parse_number)
    }

    if (convert_case) {
      upper_cols <-
        data %>% select_if(is.character) %>%
        select(-matches("^url")) %>%
        names()
      data <-
        data %>%
        mutate_at(upper_cols,
                  str_to_upper)
    }

    if (!unformat) {
      pct_names <-
        data %>%
        select_if(is.numeric) %>%
        select(matches("^percent|^pct")) %>% names()
      count_names <-
        data %>%
        select_if(is.numeric) %>%
        select(matches("^count|^number")) %>% names()

      amt_names <-
        data %>%
        select_if(is.numeric) %>%
        select(matches("^amount|^amt|^price|^earnings")) %>% names()


      if (length(pct_names) > 0) {
        data <- data %>%
          mutate_at(pct_names,
                    list(function(x){
                      x %>% percent(digits = 2)
                    }))
      }

      if (length(amt_names) > 0) {
        data <- data %>%
          mutate_at(pct_names,
                    list(function(x){
                      x %>% currency(digits = amount_digits)
                    }))
      }

      if (length(count_names) > 0) {
        data <- data %>%
          mutate_at(count_names,
                    list(function(x){
                      x %>% comma(digits = 0)
                    }))
      }
    }



    if (include_address) {
      data <-
        data %>%
        build_address()
    }

    if (snake_names) {
      data <-
        data %>%
        janitor::clean_names()
    }

    if (unformat) {
      data <- data %>%
        mutate_if(is.numeric, as.numeric)
    }

    data
  }

# filers ------------------------------------------------------------------
.get_cik_url_df <-
  function(cik = 1138621) {
    slugs <-
      c(
        'general',
        'filings',
        'private',
        'fundraising',
        'traders',
        'clevel',
        'mda',
        'owners',
        'subsidiaries'
      )

    url_json <-
      list('http://rankandfiled.com/data/filer/', cik, '/', slugs) %>%
      purrr::invoke(paste0, .)

    url_df <-
      dplyr::tibble(
        nameTable = c(
          'General',
          'Filings',
          'Private',
          'Fundraising',
          'Traders',
          'C Level',
          'MDA',
          'Owners',
          'Subsidiaries'
        ),
        urlJSON = url_json
      )
    return(url_df)
  }

.parse_json_general_filing <-
  function(url = "http://rankandfiled.com/data/filer/1468327/general",
           nest_data = TRUE,
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    data <-
      url %>%
      jsonlite::fromJSON(simplifyDataFrame = TRUE) %>%
      data.frame(stringsAsFactors = FALSE) %>%
      as_tibble()

    is_company <-
      'company' %in% names(data)

    is_insider <-
      'insider' %in% names(data)

    is_fund <-
      'fund' %in% names(data)

    data <-
      data %>%
      .resolve_name_df()

    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/', '') %>%
      str_replace_all('\\/general', '') %>%
      as.numeric()

    data <-
      data %>%
      mutate(idCIK = cik)

    if (!'nameEntity' %in% names(data)) {
      if (is_company) {
        ticker <-
          data$company

        company_name_df <-
          ticker %>%
          .parse_company_general_safe() %>%
          suppressWarnings()

        has_rows <-
          company_name_df %>% nrow > 0
        if (has_rows) {
          return(company_name_df)
        } else {
          entity_name <-
            NA
        }
      }
      if (is_insider) {
        insider_df <-
          .parse_json_general_insider(cik = cik, return_message = return_message) %>%
          mutate(nameEntity = nameEntity %>% str_to_upper())
        return(insider_df)
      }
      if (is_fund) {
        fund_df <-
          .parse_json_fund_general(cik = cik, return_message = return_message) %>%
          mutate(nameEntity = nameEntity %>% str_to_upper())
        return(fund_df)
      }
      data <-
        data %>%
        mutate(nameEntity = entity_name,
               idTicker = ticker) %>%
        select(-dplyr::matches("company"))
    }

    data <-
      data %>%
      select(-dplyr::matches("object")) %>%
      mutate_at(.vars = data %>% select(dplyr::matches("idCIK|idIRS")) %>% names(),
                as.numeric) %>%
      mutate(urlJSONGeneral = url,
             nameEntity = nameEntity %>% stringr::str_to_upper())
    has_address <-
      names(data) %in% c('addressStreet1Entity',
                         'stateEntity',
                         'cityEntity',
                         'zipcodeEntity') %>% sum() == 4
    if (has_address) {
      data <-
        data %>%
        mutate(
          addressEntity = list(
            addressStreet1Entity,
            ' ',
            cityEntity,
            ' ',
            stateEntity,
            ', ',
            zipcodeEntity
          ) %>% purrr::invoke(paste0, .)
        ) %>%
        select(idCIK, dplyr::matches("nameEntity"), addressEntity, everything())
    }

    if ('detailsOwnedBy' %in% names(data)) {
      data <-
        data %>%
        dplyr::rename(detailsOwns = detailsOwnedBy)
    }

    if ('detailsOwns' %in% names(data)) {
      detail_df <-
        seq_along(data$detailsOwns) %>%
        future_map_dfr(function(x) {
          detail_value <-
            data$detailsOwns[[x]]

          if (detail_value %>% is.na()) {
            df <-
              tibble(idRow = x, nameCompanyOwns = NA)

            if (nest_data) {
              df <-
                df %>%
                nest(-idRow, .key = dataCompaniesOwns)
            }

            return(df)
          }

          values <-
            detail_value %>% str_replace('\\|', '') %>%
            str_split('\\|') %>%
            flatten_chr()

          df_data <-
            tibble(value = values) %>%
            tidyr::separate(value,
                            into = c('idTickerOwns', 'other'),
                            sep = '\\:') %>%
            tidyr::separate(other,
                            into = c('nameCompanyOwns', 'other'),
                            sep = '\\_') %>%
            tidyr::separate(other,
                            into = c('roleOwner', 'dateOwner'),
                            sep = '\\#') %>%
            mutate(nameCompanyOwns = nameCompanyOwns %>% str_to_upper(),
                   idRow = x) %>%
            gather(item, value, -idRow, na.rm = TRUE) %>%
            group_by(item) %>%
            mutate(count = 1:n() - 1) %>%
            ungroup() %>%
            arrange((count)) %>%
            mutate(item = ifelse(count == 0, item, paste0(item, count))) %>%
            select(-count)

          column_order <-
            c('idRow', df_data$item)

          df_data <-
            df_data %>%
            spread(item, value) %>%
            select(one_of(column_order))
        }) %>%
        suppressWarnings()

      detail_df <-
        detail_df %>%
        mutate_at(.vars = detail_df %>% select(dplyr::matches("date")) %>% names(),
                  funs(. %>% ymd())) %>%
        suppressWarnings()

      if (nest_data) {
        detail_df <-
          detail_df %>%
          nest(-idRow, .key = dataCompaniesOwns)
      }

      data <-
        data %>%
        mutate(idRow = 1:n()) %>%
        select(-detailsOwns) %>%
        left_join(detail_df) %>%
        select(-idRow) %>%
        suppressMessages()
    }

    data <-
      data %>%
      select(
        nameEntity,
        idCIK,
        dplyr::matches("typeCategory"),
        dplyr::matches("idtypeCompany"),
        everything()
      )

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(data)

  }

.parse_json_filings <-
  function(url = "http://rankandfiled.com/data/filer/1138621/filings",
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/|/filings', '') %>%
      as.numeric()

    json_data <-
      url %>%
      jsonlite::fromJSON(simplifyDataFrame = TRUE) %>%
      data.frame(stringsAsFactors = FALSE) %>%
      as_tibble() %>%
      tidyr::separate(
        filings,
        sep = '\\*',
        into = c(
          'dateFiling',
          'codeFiling',
          'typeForm',
          'baseIndex',
          'detailOffering',
          'slugSEC',
          'idSECSlug'
        )
      ) %>%
      mutate(
        dateFiling = dateFiling %>% as.numeric() %>% lubridate::ymd,
        idCIK = cik,
        urlJSONFilings = url,
        urlSEC = ifelse(
          slugSEC == "None",
          NA,
          list(
            "https://www.sec.gov/Archives/edgar/data/",
            idCIK,
            '/',
            slugSEC
          ) %>% purrr::invoke(paste0, .)
        ),
        pageSlug = idSECSlug %>% str_replace_all('\\-',''),
        urlSECFilingDirectory = ifelse(
          idSECSlug %>% str_detect('\\-'),
          list(
            "https://www.sec.gov/Archives/edgar/data/",
            idCIK,
            '/',
            pageSlug,
            '/',
            idSECSlug,
            '-index.htm'
          ) %>% purrr::reduce(paste0),
          NA
        )
      ) %>%
      select(-dplyr::matches("^X")) %>%
      suppressMessages() %>%
      select(-c(slugSEC, pageSlug)) %>%
      select(idCIK, dateFiling, everything())

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(json_data)
  }

.parse_json_private <-
  function(url = "http://rankandfiled.com/data/filer/1438171/private",
           nest_data = TRUE,
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)

    status_df <-
      json_data$status_history %>% flatten_df() %>%
      mutate(date = date %>% lubridate::ymd())

    offering_history_class_df <-
      json_data$offering_history %>% future_map_dfr(class) %>%
      gather(column, type) %>%
      mutate(idName = 1:n())

    offering_data <-
      json_data$offering_history %>%
      select(offering_history_class_df %>%
               filter(!type == 'list') %>%
               .$idName)

    offering_data <-
      offering_data %>%
      as_tibble() %>%
      mutate_all(funs(. %>% str_replace('\\|', '')))

    offering_data <-
      offering_data %>%
      .resolve_name_df() %>%
      resolve_names_to_upper()

    if (offering_data %>% ncol >= 9) {
      offering_data <-
        offering_data %>%
        separate_column(column_name = 'idExemption') %>%
        separate_column(column_name = 'dateAmmended') %>%
        separate_column(column_name = 'amountFindersFee') %>%
        separate_column(column_name = 'countInvestors') %>%
        separate_column(column_name = 'countInvestorsNonAccredited') %>%
        separate_column(column_name = 'amountOffered') %>%
        separate_column(column_name = 'amountRemaining') %>%
        separate_column(column_name = 'amountSold')


      offering_data <-
        offering_data %>%
        mutate_at(.vars = offering_data %>% select(dplyr::matches("^is")) %>% names,
                  funs(. %>% as.logical())) %>%
        mutate_at(.vars = offering_data %>% select(dplyr::matches("^amount|^count|^idCIK")) %>% names,
                  funs(. %>% as.numeric())) %>%
        mutate_at(.vars = offering_data %>% select(dplyr::matches("^date")) %>% names,
                  funs(. %>% lubridate::ymd())) %>%
        mutate_at(.vars = offering_data %>% select(dplyr::matches("^amount")) %>% names,
                  funs(. %>% formattable::currency(digits = 0))) %>%
        mutate_at(.vars = offering_data %>% select(dplyr::matches("^count")) %>% names,
                  funs(. %>% formattable::comma(digits = 0))) %>%
        mutate_if(is.numeric, as.numeric)
    } else {
      offering_data <-
        offering_data %>%
        mutate_at(.vars = offering_data %>% select(dplyr::matches("^amount|^count|^idCIK")) %>% names,
                  funs(. %>% as.numeric())) %>%
        mutate_at(.vars = offering_data %>% select(dplyr::matches("^date")) %>% names,
                  funs(. %>% lubridate::ymd()))
    }

    has_relations <-
      '_related_people' %in% names(json_data$offering_history)

    if (has_relations) {
      relation_df <-
        1:(json_data$offering_history$amended %>% length()) %>%
        future_map_dfr(function(x) {
          if (!json_data$offering_history$`_related_people`[[x]] %>% purrr::is_null()) {
            relation_data <-
              json_data$offering_history$`_related_people`[[x]] %>% mutate(
                name =
                  ifelse(
                    name %>% substr(1, 3) %>% str_detect('\\-'),
                    name %>% str_replace_all('\\-', '') %>% str_trim,
                    name %>% str_trim
                  )
              ) %>%
              tidyr::unite(nameRelation, name, relation, sep = '-') %>%
              .$nameRelation %>% paste0(collapse = '&')
          } else {
            relation_data <-
              NA
          }
          tibble(nameRelation = relation_data)
        }) %>%
        resolve_names_to_upper()

      relation_df <-
        1:nrow(relation_df) %>%
        future_map_dfr(function(x) {
          person_title <-
            relation_df$nameRelation[[x]] %>%
            str_split('\\&') %>%
            flatten_chr() %>%
            str_to_upper() %>%
            str_trim()

          df <-
            tibble(idRow = x, person_title) %>%
            tidyr::separate(
              person_title,
              sep = '\\-',
              into = c('nameRelatedParty', 'titleRelatedParty')
            ) %>%
            mutate(countItem = 1:n() - 1) %>%
            gather(item, value, -c(idRow, countItem)) %>%
            arrange(countItem)

          df <-
            df %>%
            mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
            select(-countItem)
          column_order <-
            c('idRow', df$item)

          df <-
            df %>%
            spread(item, value) %>%
            select(one_of(column_order))

          if (nest_data) {
            df <-
              df %>%
              nest(-idRow, .key = dataRelations)
          }
          return(df)
        })

      offering_data <-
        offering_data %>%
        mutate(idRow = 1:n()) %>%
        left_join(relation_df) %>%
        suppressMessages() %>%
        select(-idRow)
    }

    has_brokers <-
      '_brokers' %in% names(json_data$offering_history)

    if (has_brokers) {
      broker_df <-
        1:(json_data$offering_history$amended %>% length()) %>%
        map_dfr(function(x) {
          empty_value <-
            json_data$offering_history$`_brokers`[[x]] %>% length() ==0
          if (empty_value) {
            broker_crd <-
              NA
          } else {
            broker_crd <-
              json_data$offering_history$`_brokers`[[x]] %>%
              tidyr::unite(nameBrokerCRD, name, crd, sep = '&') %>%
              .$nameBrokerCRD %>%
              paste0(collapse = ' | ')
          }
          tibble(nameBrokerCRD = broker_crd)
        }) %>%
        resolve_names_to_upper()

      broker_df <-
        1:nrow(broker_df) %>%
        future_map_dfr(function(x) {
          broker_crd <-
            broker_df$nameBrokerCRD[[x]] %>%
            str_split('\\|') %>%
            flatten_chr() %>%
            str_to_upper() %>%
            str_trim()

          if (broker_crd %>% is.na() %>% sum() > 0) {
            df <-
              tibble(
                idRow = x,
                nameBroker = "NONE",
                idCRDBroker = NA
              )
            if (nest_data) {
              df <-
                df %>%
                nest(-idRow, .key = dataBrokers)
            }
            return(tibble())
          }

          df <-
            tibble(idRow = x, broker_crd) %>%
            tidyr::separate(broker_crd,
                            sep = '\\&',
                            into = c('nameBroker', 'idCRDBroker')) %>%
            mutate(countItem = 1:n() - 1) %>%
            gather(item, value, -c(idRow, countItem)) %>%
            arrange(countItem) %>%
            mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
            select(-countItem)

          column_order <-
            c('idRow', df$item)

          df <-
            df %>%
            spread(item, value) %>%
            select(one_of(column_order))

          df <-
            df %>%
            mutate_at(df %>% select(dplyr::matches("idCRD")) %>% names(),
                      funs(. %>% as.numeric())) %>%
            resolve_names_to_upper()

          if (nest_data) {
            df <-
              df %>%
              nest(-idRow, .key = dataBrokers)
          }
          return(df)
        })

      offering_data <-
        offering_data %>%
        mutate(idRow = 1:n()) %>%
        left_join(broker_df) %>%
        suppressMessages() %>%
        select(-idRow)
    }

    if ('date' %in% names(status_df)) {
      initial_date <-
        status_df$date
    } else {
      initial_date <-
        NA
    }

    if ('entity_type' %in% names(status_df)) {
      typeEntity <-
        status_df$entity_type
    } else {
      typeEntity <-
        NA
    }

    if ('jurisdiction' %in% names(status_df)) {
      jurisdiction <-
        status_df$jurisdiction
    } else {
      jurisdiction <-
        NA
    }

    if ('over_five' %in% names(status_df)) {
      has_five <-
        status_df$over_five
    } else {
      has_five <-
        FALSE
    }
    offering_data <-
      offering_data %>%
      mutate(
        dateInitialFiling = initial_date,
        typeEntity = typeEntity,
        locationJurisdiction = jurisdiction,
        hasOver5FileFilings = has_five,
        urlJSONFilings = url
      ) %>%
      select(
        idCIK,
        dateInitialFiling,
        typeEntity,
        locationJurisdiction,
        hasOver5FileFilings,
        dplyr::matches("nameIndustry"),
        dplyr::matches("typeFund"),
        dplyr::matches("^is"),
        dplyr::matches("^amount"),
        dplyr::matches("^count"),
        everything()
      ) %>%
      resolve_names_to_upper() %>%
      select(which(colMeans(is.na(.)) < 1))

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    return(offering_data)
  }

.parse_json_fundraising <-
  function(url = "http://rankandfiled.com/data/filer/1138621/fundraising",
           nest_data = TRUE,
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    fundraising_df <-
      json_data$results %>%
      as_tibble() %>%
      purrr::set_names(c(
        'idCIKs',
        'nameCompanies',
        'isCIKFiler',
        'namePerson',
        'offeringsValues'
      )) %>%
      mutate(
        idPerson = 1:n(),
        idCIK = url %>% str_replace_all('http://rankandfiled.com/data/filer/|/fundraising', '') %>% as.numeric(),
        namePerson = namePerson %>% str_replace_all('\\-', '') %>% stringr::str_to_upper() %>% str_trim(),
        urlJSONFundraising = url
      ) %>%
      suppressWarnings()

    company_name_df <-
      seq_along(fundraising_df$nameCompanies) %>%
      future_map_dfr(function(x) {
        company_name_data <-
          fundraising_df$nameCompanies[[x]]

        company_name_data <-
          company_name_data %>%
          str_split('\\*') %>%
          flatten_chr() %>%
          str_to_upper()

        df <-
          tibble(value = company_name_data) %>%
          mutate(item = 'nameCompanyFundraisingRelated') %>%
          mutate(countRow = 1:n()) %>%
          mutate(
            countRow = countRow - 1,
            item = ifelse(countRow == 0, item, item %>% paste0(countRow)),
            idPerson = x
          ) %>%
          select(-countRow)

        col_order <-
          c('idPerson', df$item)

        df <-
          df %>%
          spread(item, value) %>%
          select(one_of(col_order)) %>%
          resolve_names_to_upper()

        if (nest_data) {
          df <-
            df %>%
            nest(-idPerson, .key = dataCompaniesRelated)
        }
        return(df)
      })


    offering_value_df <-
      seq_along(fundraising_df$offeringsValues) %>%
      future_map_dfr(function(x) {
        offering_value_data <-
          fundraising_df$offeringsValues[[x]]

        offering_value_data <-
          offering_value_data %>%
          str_split('\\*') %>%
          flatten_chr()

        df <-
          tibble(offering = offering_value_data) %>%
          tidyr::separate(
            offering,
            into = c(
              'idCIKRelatedCompanyFundraising',
              'idIndustryRelatedCompanyFundRaising',
              'amountRaisedRelatedCompanyFundRaising'
            ),
            sep = '\\|'
          ) %>%
          mutate(countRow = 1:n()) %>%
          gather(item, value, -countRow) %>%
          mutate(
            countRow = countRow - 1,
            value = value %>% as.numeric(),
            item = ifelse(countRow == 0, item, item %>% paste0(countRow)),
            idPerson = x
          ) %>%
          select(-countRow)

        col_order <-
          c('idPerson', df$item)

        df <-
          df %>%
          spread(item, value) %>%
          select(one_of(col_order)) %>%
          resolve_names_to_upper()

        if (nest_data) {
          df <-
            df %>%
            nest(-idPerson, .key = dataOfferingValues)
        }
        return(df)
      })

    fundraising_df <-
      fundraising_df %>%
      left_join(company_name_df) %>%
      left_join(offering_value_df) %>%
      select(-c(idCIKs, nameCompanies, idPerson, offeringsValues)) %>%
      select(idCIK, namePerson, isCIKFiler, everything()) %>%
      suppressMessages() %>%
      suppressWarnings()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    return(fundraising_df)
  }

.parse_json_traders <-
  function(url = "http://rankandfiled.com/data/filer/1326801/traders",
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)
    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/|/traders', '') %>%
      as.numeric()
    traders <-
      json_data$owners$count

    df <-
      json_data$owners$owners %>%
      as_tibble() %>%
      purrr::set_names(c('nameEntityTrader', 'idCIKTrader', 'titleEntityTrader')) %>%
      mutate(
        nameEntityTrader = nameEntityTrader %>% str_to_upper(),
        idCIKTrader = idCIKTrader %>% as.numeric(),
        idCIK = cik
      ) %>%
      select(idCIK, everything()) %>%
      mutate(countTraders = traders) %>%
      resolve_names_to_upper()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df)
  }

.parse_json_clevel <-
  function(url = "http://rankandfiled.com/data/filer/1326801/clevel",
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)
    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/|/clevel', '') %>%
      as.numeric()

    clevel_df <-
      json_data$clevel %>%
      as_tibble() %>%
      tidyr::separate(
        value,
        into = c(
          "idCIKCSuite",
          "nameEntityCSuite",
          "dateStartCSuite",
          "dateEndCSuite",
          "nameCSuiteRole",
          'codeCSuiteRole'
        ),
        sep = '\\*'
      ) %>%
      mutate(
        idCIKCSuite = idCIKCSuite %>% as.numeric(),
        nameEntityCSuite = nameEntityCSuite %>% str_to_upper(),
        idCIK = cik,
        dateStartCSuite = dateStartCSuite %>% lubridate::ymd(),
        dateEndCSuite = dateEndCSuite %>% lubridate::ymd()
      ) %>%
      select(idCIK,
             idCIKCSuite,
             nameEntityCSuite,
             codeCSuiteRole,
             everything()) %>%
      mutate(isActiveCSuite = ifelse(dateEndCSuite %>% is.na(), TRUE, FALSE)) %>%
      resolve_names_to_upper()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(clevel_df)
  }

.parse_json_mda <-
  function(url = "http://rankandfiled.com/data/filer/1326801/mda",
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)

    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/|/mda', '') %>%
      as.numeric()

    data <-
      json_data$results$matrix %>%
      as_tibble()

    names(data) <-
      json_data$results$dates %>% lubridate::ymd()
    words <-
      json_data$results$words

    data <-
      data %>%
      mutate(nameWord = words) %>%
      gather(date10K, countWord, -nameWord) %>%
      mutate(date10K = date10K %>% lubridate::ymd(),
             idCIK = cik) %>%
      select(idCIK, date10K, nameWord, countWord) %>%
      arrange(desc(date10K)) %>%
      resolve_names_to_upper()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(data)
  }

.parse_json_owners <-
  function(url = "http://rankandfiled.com/data/filer/1326801/owners",
           nest_data = TRUE,
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/|/owners', '') %>%
      as.numeric()

    general_df <-
      tibble(idCIK = cik,
             idCIKOwned = json_data$insiders$cik %>% as.numeric())

    has_filer <-
      'filer' %in% names(json_data$insiders)

    if (has_filer) {
      filing_df <-
        json_data$insiders$filer %>%
        as_tibble()

      filing_df <-
        filing_df %>%
        .resolve_name_df()

      filing_df <-
        filing_df %>%
        mutate_at(.vars = filing_df %>% select(dplyr::matches("nameEntity")) %>% names(),
                  funs(. %>% str_to_upper())) %>%
        resolve_names_to_upper()

      if ('name' %in% names(filing_df)) {
        filing_df <-
          filing_df %>%
          select(-name)
      }

      if ('sic' %in% names(filing_df)) {
        filing_df <-
          filing_df %>%
          dplyr::rename(idSIC = sic) %>%
          mutate(idSIC = idSIC %>% as.numeric())
      }

      names(filing_df) <-
        names(filing_df) %>% str_replace('OwnedBy', '') %>%
        paste0('Owner')
      if ('detailsOwner' %in% names(filing_df)) {
        detail_df <-
          seq_along(filing_df$detailsOwner) %>%
          future_map_dfr(function(x) {
            detail_value <-
              filing_df$detailsOwner[[x]]

            if (detail_value %>% is.na()) {
              df <-
                tibble(idRow = x, nameCompanyOwned = NA)

              if (nest_data) {
                df <-
                  df %>%
                  nest(-idRow, .key = dataCompaniesOwned)
              }
              return(df)
            }

            values <-
              detail_value %>% str_replace('\\|', '') %>%
              str_split('\\|') %>%
              flatten_chr()

            df_data <-
              tibble(value = values) %>%
              tidyr::separate(value,
                              into = c('idTickerOwned', 'other'),
                              sep = '\\:') %>%
              tidyr::separate(other,
                              into = c('nameCompanyOwned', 'other'),
                              sep = '\\_') %>%
              tidyr::separate(other,
                              into = c('roleOwned', 'dateOwned'),
                              sep = '\\#') %>%
              mutate(nameCompanyOwned = nameCompanyOwned %>% str_to_upper(),
                     idRow = x) %>%
              gather(item, value, -idRow, na.rm = TRUE) %>%
              group_by(item) %>%
              mutate(count = 1:n() - 1) %>%
              ungroup() %>%
              arrange((count)) %>%
              mutate(item = ifelse(count == 0, item, paste0(item, count))) %>%
              select(-count)

            column_order <-
              c('idRow', df_data$item)

            df_data <-
              df_data %>%
              spread(item, value) %>%
              select(one_of(column_order)) %>%
              resolve_names_to_upper()

            if (nest_data) {
              df_data <-
                df_data %>%
                nest(-idRow, .key = dataCompaniesOwned)
            }

            return(df_data)
          }) %>%
          suppressWarnings()

        detail_df <-
          detail_df %>%
          mutate_at(.vars = detail_df %>% select(dplyr::matches("date")) %>% names(),
                    funs(. %>% ymd())) %>%
          suppressWarnings()

        filing_df <-
          filing_df %>%
          mutate(idRow = 1:n()) %>%
          select(-detailsOwner) %>%
          left_join(detail_df) %>%
          select(-idRow) %>%
          suppressMessages()
      }

      general_df <-
        general_df %>%
        bind_cols(filing_df)
    }

    has_companies <-
      'companies' %in% names(json_data$insiders)

    if (has_companies) {
      company_df <-
        1:nrow(general_df) %>%
        future_map_dfr(function(x) {
          has_no_data <-
            json_data$insiders$companies[[x]] %>%
            nrow() == 0

          if (has_no_data) {
            df <-
              tibble(idRow = x, nameFiler = NA)
            if (nest_data) {
              df <-
                df %>%
                nest(idRow, .key = dataInsiderCompaniesOwned)
            }
          }

          company_df <-
            json_data$insiders$companies[[x]] %>%
            as_tibble() %>%
            .resolve_name_df() %>%
            mutate(idRow = x) %>%
            mutate(nameFiler = nameFiler %>% str_to_upper())

          if ('sic' %in% names(company_df)) {
            company_df <-
              company_df %>%
              dplyr::rename(idSICCompanyOwned = sic) %>%
              mutate(idSICCompanyOwned = idSICCompanyOwned %>% as.numeric())
          }

          df_data <-
            company_df %>%
            gather(item, value, -c(nameFiler, idRow)) %>%
            group_by(item) %>%
            mutate(count = 1:n() - 1) %>%
            ungroup() %>%
            arrange((count)) %>%
            mutate(item = ifelse(count == 0, item, paste0(item, count))) %>%
            select(-count)

          column_order <-
            c('idRow', 'nameFiler', df_data$item)

          df_data <-
            df_data %>%
            spread(item, value) %>%
            select(one_of(column_order)) %>%
            resolve_names_to_upper()

          if (nest_data) {
            df_data <-
              df_data %>%
              nest(-idRow, .key = dataInsiderCompaniesOwned)
          }
          return(df_data)
        })

      company_df <-
        company_df %>%
        mutate_at(.vars =
                    company_df %>% select(dplyr::matches("date")) %>% names(),
                  funs(. %>% lubridate::ymd())) %>%
        mutate_at(.vars =
                    company_df %>% select(dplyr::matches("idCIK")) %>% names(),
                  .funs = as.numeric) %>%
        mutate_at(
          .vars =
            company_df %>% select(dplyr::matches("nameCompany")) %>% names(),
          .funs = stringr::str_to_upper
        )

      general_df <-
        general_df %>%
        mutate(idRow = 1:n()) %>%
        left_join(company_df %>% select(-dplyr::matches("idCIKOwned"))) %>%
        select(-idRow) %>%
        suppressMessages()
    }

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    general_df <-
      general_df %>%
      select(idCIK,
             idCIKOwned,
             nameEntityOwner,
             dplyr::matches("nameFiler"),
             everything()) %>%
      resolve_names_to_upper()

    return(general_df)
  }

.parse_json_public_filers <-
  function(url = "http://rankandfiled.com/data/filer/1680780/all?start=0",
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)

    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/|/all', '') %>%
      str_split('\\?') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()

    filing_df <-
      json_data$filings %>%
      as_tibble()

    filing_df <-
      filing_df %>%
      separate(
        value,
        into = c(
          "idRF",
          "idForm",
          "detailForm",
          "typeReport",
          "typeFiling",
          "slugSEC",
          "idSECSlug",
          "dateFiling",
          "X9"
        ),
        sep = '\\*'
      ) %>%
      select(-dplyr::matches("X")) %>%
      suppressMessages() %>%
      suppressWarnings()

    filing_df <-
      filing_df %>%
      mutate(
        idCIK = cik,
        pageSlug = idSECSlug %>% str_replace_all('\\-',''),
        urlSECFilingDirectory = ifelse(
          idSECSlug %>% str_detect('\\-'),
          list(
            "https://www.sec.gov/Archives/edgar/data/",
            idCIK,
            '/',
            pageSlug,
            '/',
            idSECSlug,
            '-index.htm'
          ) %>% purrr::reduce(paste0),
          NA
        ),
        urlSEC = ifelse(
          slugSEC == "None",
          NA,
          list(
            "https://www.sec.gov/Archives/edgar/data/",
            idCIK,
            '/',
            slugSEC
          ) %>% purrr::invoke(paste0, .)
        )
      ) %>%
      select(-pageSlug) %>%
      suppressWarnings()

    filing_df <-
      filing_df %>%
      mutate(
        typeFiling = typeFiling %>% str_to_upper(),
        dateFiling = dateFiling %>% as.numeric() %>% lubridate::ymd(),
        detailForm = ifelse(detailForm == '', NA, detailForm),
        typeReport = ifelse(typeReport == '', NA, typeReport),
        is13FFiling = (urlSEC %>% str_detect("xslForm13F")) &
          (typeFiling == "HOLDINGS")
      ) %>%
      tidyr::fill(dateFiling) %>%
      tidyr::fill(detailForm) %>%
      select(-slugSEC) %>%
      left_join(dictionary_sec_form_codes()) %>%
      tidyr::fill(nameForm) %>%
      select(idCIK, idRF, idForm, nameForm, everything()) %>%
      suppressMessages() %>%
      suppressWarnings() %>%
      resolve_names_to_upper()



    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    return(filing_df)
  }

.parse_json_subsidiaries <-
  function(url = "http://rankandfiled.com/data/filer/34088/subsidiaries",
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }
    options(scipen = 9999)

    name_df <-
      tibble(
        nameRF = c(
          "cik",
          "country",
          "first_filed",
          "last_filed",
          "name",
          "percent"
        ),
        nameActual = c(
          'idCIK',
          'locationOrganizationSubsidiary',
          'dateFirstFiled',
          'dateLastFiled',
          'nameSubsidiary',
          'pctSubsidiaryOwned'
        )
      ) %>%
      mutate(idRow = 1:n())

    data <-
      url %>%
      jsonlite::fromJSON() %>%
      .$subsidiaries %>%
      as_tibble()

    rf_names <-
      data %>% names()

    has_missing_names <-
      rf_names[!rf_names %in% name_df$nameRF] %>% length() > 0

    if (has_missing_names) {
      df_has <-
        data %>%
        select(one_of(rf_names[rf_names %in% name_df$nameRF]))

      has_names <-
        names(df_has) %>%
        map_chr(function(x) {
          name_df %>%
            filter(nameRF == x) %>%
            filter(idRow == min(idRow)) %>%
            .$nameActual
        })

      df_has <-
        df_has %>%
        purrr::set_names(has_names)

      data <-
        df_has %>%
        bind_cols(data %>%
                    select(one_of(rf_names[!rf_names %in% name_df$nameRF])))

      data <-
        data %>%
        mutate_at(.vars =
                    data %>% select(
                      dplyr::matches(
                        "idCIK|idMidas|idIRS|^count|^price|^amount|^ratio|^pct|idMDA|^dateiso|idRF|price|amount|^year"
                      )
                    ) %>% names,
                  funs(. %>% as.character() %>% readr::parse_number())) %>%
        suppressWarnings()
      return(data)
    }

    actual_names <-
      names(data) %>%
      map_chr(function(x) {
        name_df %>%
          filter(nameRF == x) %>%
          filter(idRow == min(idRow)) %>%
          .$nameActual
      })

    data <-
      data %>%
      purrr::set_names(actual_names)

    data <-
      data %>%
      mutate(
        idCIK = idCIK %>% as.numeric(),
        nameSubsidiary = nameSubsidiary %>% str_to_upper(),
        locationOrganizationSubsidiary = locationOrganizationSubsidiary %>% str_to_upper()
      )
    has_pct <-
      'pctSubsidiaryOwned' %in% names(data)
    if (has_pct) {
      data <-
        data %>%
        mutate(
          pctSubsidiaryOwned = pctSubsidiaryOwned %>% as.numeric(),
          pctSubsidiaryOwned = pctSubsidiaryOwned / 100
        )
    }

    data <-
      data %>%
      mutate_at(.vars = data %>% select(dplyr::matches("date")) %>% names(),
                funs(. %>% lubridate::ymd()))

    data <-
      data %>%
      filter(!locationOrganizationSubsidiary %>% is.na()) %>%
      resolve_names_to_upper()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    return(data)
  }

.parse_cik_filings <-
  function(cik = 1527559,
           return_message = TRUE) {
    general_url <-
      list('http://rankandfiled.com/data/filer/', cik, '/general') %>%
      purrr::invoke(paste0, .)

    data_js <-
      general_url %>% jsonlite::fromJSON() %>% data.frame(stringsAsFactors = FALSE)

    is_public_company <-
      'company' %in% (data_js %>% names())

    is_insider <-
      'insider' %in% (data_js %>% names())
    if (is_public_company) {
      company_df <-
        general_url %>% jsonlite::fromJSON() %>% data.frame(stringsAsFactors = FALSE) %>%
        as_tibble()

      general_df <-
        .parse_company_general_safe(ticker = company_df$company)
    }

    if (is_insider) {
      general_df <-
        .parse_json_general_insider(cik = cik)
    }

    is_private_filer <-
      (!is_public_company) & (!is_insider)
    if (is_private_filer) {
      general_df <-
        general_url %>%
        .parse_json_general_filing()
    }

    filing_pages <-
      general_df$countFilings %/% 50
    if (filing_pages > 0) {
      filing_urls <-
        list(
          'http://rankandfiled.com/data/filer/',
          cik,
          '/all?start=',
          seq(0, by = 50, length.out = filing_pages)
        ) %>%
        purrr::invoke(paste0, .)
    }

    if (filing_pages == 0) {
      filing_urls <-
        list('http://rankandfiled.com/data/filer/',
             cik,
             '/all?start=0') %>%
        purrr::invoke(paste0, .)
    }

    .parse_json_public_filers_safe <-
      purrr::possibly(.parse_json_public_filers, NULL)

    .all_filings <-
      filing_urls %>%
      future_map_dfr(function(x) {
        .parse_json_public_filers_safe(url = x, return_message = return_message)
      }) %>%
      distinct() %>%
      suppressWarnings()

    entity <-
      general_df$nameEntity %>%
      str_to_upper()

    .all_filings <-
      .all_filings %>%
      mutate(nameEntity = entity) %>%
      select(idCIK, nameEntity, dateFiling,
             dplyr::matches("idRF"),
             everything())

    if ('typeReport' %in% names(.all_filings)) {
      report_dict_df <-
        dictionary_sec_filing_codes()

      report_df <-
        .all_filings %>%
        mutate(idRow = 1:n()) %>%
        select(typeReport, idRow) %>%
        filter(!typeReport %>% is.na())

      report_df <-
        1:nrow(report_df) %>%
        future_map_dfr(function(x) {
          is_none <-
            report_df$typeReport[[x]] == 'None'

          if (is_none) {
            return(tibble(
              idRow = report_df$idRow[[x]],
              idFormType = 'None',
              nameFormType = NA
            ))
          }

          row_df <-
            report_df %>%
            slice(x)
          reports <-
            row_df$typeReport %>%
            str_split('\\|') %>%
            flatten_chr()

          item_df <-
            tibble(idFormType = reports, idRow = row_df$idRow) %>%
            left_join(report_dict_df) %>%
            gather(item, value, -idRow) %>%
            group_by(item) %>%
            mutate(countItems = 1:n() - 1) %>%
            ungroup() %>%
            mutate(item = ifelse(countItems == 0, item, paste0(item, countItems))) %>%
            arrange(countItems) %>%
            select(-countItems) %>%
            suppressMessages()

          col_order <-
            c('idRow', item_df$item)

          item_df <-
            item_df %>%
            spread(item, value) %>%
            select(one_of(col_order))
          return(item_df)

        })

      .all_filings <-
        .all_filings %>%
        mutate(idRow = 1:n()) %>%
        dplyr::rename(typesReport = typeReport) %>%
        left_join(report_df) %>%
        suppressMessages() %>%
        select(-idRow)
    }


    if (return_message) {
      list(
        "Parsed ",
        .all_filings %>% nrow() %>% formattable::comma(digits = 0),
        ' SEC Filings for ',
        entity
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }
    .all_filings <-
      .all_filings %>%
      resolve_names_to_upper()
    return(.all_filings)
  }

.parse_cik_data <-
  function(cik = 899689,
           nest_data = TRUE,
           tables = NULL,
           return_message = TRUE) {
    url_df <-
      cik %>%
      .get_cik_url_df()

    table_options <-
      c(
        'General',
        'CIK Filings',
        'Filings',
        'Private Offerings',
        'Related Parties',
        'Traders',
        'C Level',
        'MDA',
        'Owners',
        'Insider Trades',
        'Trades',
        'Subsidiaries'
      )

    null_tables <-
      length(tables) == 0
    if (null_tables) {
      tables <-
        c(
          'General',
          'CIK Filings',
          'Filings',
          'Private Offerings',
          'Related Parties',
          'Traders',
          'C Level',
          'MDA',
          'Owners',
          'Insider Trades',
          'Trades',
          'Subsidiaries'
        )
    }
    missing_tables <-
      (tables %>% str_to_upper()) %in% (table_options %>% str_to_upper()) %>% sum() == 0
    if (missing_tables) {
      stop(list(
        "Sorry Tables Can Only Be:",
        '\n',
        paste0(table_options, collapse = '\n')
      ) %>%
        purrr::invoke(paste0, .))
    }

    table_options <-
      table_options %>% str_to_upper()

    tables <-
      tables %>% str_to_upper()

    if (!'GENERAL' %in% tables) {
      tables <-
        tables %>%
        append('GENERAL')
    }

    has_general <-
      'general' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_filings <-
      'filings' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_cik_filings <-
      'cik filings' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_private <-
      'private offerings' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_related <-
      'related parties' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_traders <-
      'traders' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_clevel <-
      'c level' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_mda <-
      'mda' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_owners <-
      'owners' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_insider_trades <-
      'insider trades' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    has_subs <-
      'subsidiaries' %>% str_to_upper() %>% str_detect(tables) %>% sum() > 0

    if (has_general) {
      .parse_json_general_filing_safe <-
        purrr::possibly(.parse_json_general_filing, tibble())
      general_df <-
        url_df$urlJSON[[1]] %>%
        .parse_json_general_filing(nest_data = nest_data,
                                   return_message = return_message) %>%
        mutate(nameEntity = nameEntity %>% str_to_upper()) %>%
        as_tibble()

      if (general_df %>% nrow() == 0) {
        general_df <-
          tibble(idCIK = cik,
                 nameEntity = NA)
      }
    } else {
      general_df <-
        tibble(idCIK = cik)
    }


    if (has_filings) {
      .parse_json_filings_safe <-
        purrr::possibly(.parse_json_filings, tibble())

      filing_df <-
        url_df$urlJSON[[2]] %>%
        .parse_json_filings_safe(return_message = return_message) %>%
        mutate_if(is_character,
                  str_to_upper)

      has_rows  <-
        filing_df %>% nrow() > 0

      if (has_rows) {
        filing_df <-
          filing_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      filing_df <-
        tibble(idCIK = cik)
    }


    if (has_private) {
      .parse_json_private_safe <-
        purrr::possibly(.parse_json_private, tibble())

      private_df <-
        url_df$urlJSON[[3]] %>%
        .parse_json_private_safe(nest_data = nest_data,
                                 return_message = return_message)

      has_rows  <-
        private_df %>% nrow() > 0

      if (has_rows) {
        private_df <-
          private_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      private_df <-
        tibble(idCIK = cik)
    }


    if (has_related) {
      .parse_json_fundraising_safe <-
        purrr::possibly(.parse_json_fundraising, tibble())

      fundraising_df <-
        url_df$urlJSON[[4]] %>%
        .parse_json_fundraising_safe(nest_data = nest_data,
                                     return_message = return_message)

      has_rows  <-
        fundraising_df %>% nrow() > 0
      if (has_rows) {
        fundraising_df <-
          fundraising_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      fundraising_df <-
        tibble(idCIK = cik)
    }

    if (has_traders) {
      .parse_json_traders_safe <-
        purrr::possibly(.parse_json_traders, tibble())

      traders_df <-
        url_df$urlJSON[[5]] %>%
        .parse_json_traders_safe(return_message = return_message)

      has_rows  <-
        traders_df %>% nrow() > 0
      if (has_rows) {
        traders_df <-
          traders_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      traders_df <-
        tibble(idCIK = cik)
    }


    if (has_clevel) {
      .parse_json_clevel_safe <-
        purrr::possibly(.parse_json_clevel, tibble())

      clevel_df <-
        url_df$urlJSON[[6]] %>%
        .parse_json_clevel_safe(return_message = return_message)

      has_rows  <-
        clevel_df %>% nrow() > 0
      if (has_rows) {
        clevel_df <-
          clevel_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      clevel_df <-
        tibble(idCIK = cik)
    }

    if (has_mda) {
      .parse_json_mda_safe <-
        purrr::possibly(.parse_json_mda, tibble())

      mda_df <-
        url_df$urlJSON[[7]] %>%
        .parse_json_mda_safe(return_message = return_message)

      has_rows  <-
        mda_df %>% nrow() > 0

      if (has_rows) {
        mda_df <-
          mda_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      mda_df <-
        tibble(idCIK = cik)
    }

    if (has_owners) {
      .parse_json_owners_safe <-
        purrr::possibly(.parse_json_owners, tibble())

      owners_df <-
        url_df$urlJSON[[8]] %>%
        .parse_json_owners_safe(nest_data = nest_data,
                                return_message = return_message)

      if ('idTypeFilerOwner' %in% names(owners_df)) {
        owners_df <-
          owners_df %>%
          left_join(.filer_type_df()) %>%
          select(idCIK:nameEntityOwner, typeFilerOwner, everything()) %>%
          suppressMessages()
      }

      has_rows  <-
        owners_df %>% nrow() > 0
      if (has_rows) {
        owners_df <-
          owners_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          select(-dplyr::matches("dateiso")) %>%
          suppressMessages()
      }
    } else {
      owners_df <-
        tibble(idCIK = cik)
    }

    if (has_cik_filings) {
      .parse_cik_filings_safe <-
        purrr::possibly(.parse_cik_filings, tibble())

      cik_filing_df <-
        .parse_cik_filings_safe(cik = cik, return_message = return_message)
    } else {
      cik_filing_df <-
        tibble(idCIK = cik)
    }

    if (has_insider_trades) {
      parse_insider_trades_safe <-
        purrr::possibly(.parse_insider_trades, tibble())

      insider_trade_df <-
        parse_insider_trades_safe(cik = cik,
                                  nest_data = nest_data,
                                  return_message = return_message)

      has_rows  <-
        insider_trade_df %>% nrow() > 0

      if (has_rows) {
        insider_trade_df <-
          insider_trade_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      insider_trade_df <-
        tibble(idCIK = cik)
    }

    if (has_subs) {
      .parse_json_subsidiaries_safe <-
        purrr::possibly(.parse_json_subsidiaries, tibble())

      sub_df <-
        url_df$urlJSON[[9]] %>%
        .parse_json_subsidiaries(return_message = return_message)

      has_rows  <-
        sub_df %>% nrow() > 0
      if (has_rows) {
        sub_df <-
          sub_df %>%
          left_join(general_df %>% select(idCIK, nameEntity)) %>%
          select(nameEntity, idCIK, everything()) %>%
          suppressMessages()
      }
    } else {
      sub_df <-
        tibble(idCIK = cik)
    }

    if ('nameEntity' %in% names(general_df)) {
      nameEntity <-
        general_df$nameEntity %>%
        str_to_upper()
    } else {
      nameEntity <-
        NA
    }


    all_data <-
      tibble(
        idCIK = cik,
        nameEntity,
        nameTable = c(
          'General',
          'CIK Filings',
          'Filings',
          'Private Offerings',
          'Related Parties',
          'Traders',
          'C Level',
          'MDA',
          'Owners',
          'Insider Trades',
          'Subsidiaries'
        ),
        dataTable = list(
          general_df,
          cik_filing_df,
          filing_df,
          private_df,
          fundraising_df,
          traders_df,
          clevel_df,
          mda_df,
          owners_df,
          insider_trade_df,
          sub_df
        )
      )

    if (return_message) {
      list("\nParsed SEC Private Filing Data for CIK: ",
           cik,
           ' - ',
           nameEntity,
           "\n") %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    all_data <-
      all_data %>%
      mutate(countCols = dataTable %>% purrr::map_dbl(ncol)) %>%
      filter(countCols > 1) %>%
      suppressWarnings() %>%
      select(-dplyr::matches("countCols")
      )

    all_data
  }

#' SEC filer
#'
#' This is function imports data
#' for a specified SEC filing entity.  An
#' SEC filing entity can be a person, public company or
#' private filer.  This function requires that the entity has a
#' Central Index Key [CIK].
#'
#' The function acquires information for the
#' specified tables and will auto parse forms if the filer has them and
#' the user activates these parameters.
#'
#' @param entity_names vector names to search
#' @param tickers character vector of ticker symbols to search
#' @param ciks numeric vector of CIKs
#' @param tables tables to include if they exist \itemize{
#' \code{NULL, General, CIK Filings, Filings, Private Offerings, Related Parties, Traders, C Level, MDA, Owners, Insider Trades, Trades}
#' \code{NULL}: selects all tables
#' \item \code{General}: general information about the filer
#' \item \code{CIK Filings}: summarised filings for a CIK
#' \item \code{Filings}: summarised filings for an entity, slightly different than \code{CIK Filings}
#' \item \code{Private Offerings}: parses any private offerings
#' \item \code{Related Parties}:  parses any related parties [people]
#' \item \code{Traders}: parses major traders
#' \item \code{C Level}: parses information about executives
#' \item \code{MDA}: parses text from company 10-K Management Discussion and Analysis [MDA] section
#' \item \code{Owners}: parses information about major owners
#' \item \code{Insider Trades}: parses insider trade information
#' \item \code{Trades}: parses all trade information
#' }
#' @param parse_all_filing_url_data \code{TRUE} parses every SEC fling link
#' @param parse_xbrl \code{TRUE} parse XBRL for public companies, data starts in 2009
#' @param parse_subsidiaries \code{TRUE} parse all filer subsidiaries (default)
#' @param parse_13F \code{TRUE} parse \href{https://en.wikipedia.org/wiki/Form_13F}{13F's} for institutional managers
#' @param parse_asset_files \code{TRUE} parses ABS XML for \href{https://www.sec.gov/info/edgar/specifications/absxml.htm}{ABS Asset Data}
#' filing entities (default)
#' @param parse_small_offerings \code{TRUE} parses \href{https://www.sec.gov/info/smallbus/secg/rccomplianceguide-051316.htm}{Regulation CrowdFunding}
#' Form 1-A data if any exists for a filer (default)
#' @param nest_data return a nested data frame \code{TRUE, FALSE}
#' @param assign_to_environment \code{true} assigns individual data frames to your environment
#' @param return_message \code{TRUE} return a message after data import
#' @import dplyr tidyr purrr stringr formattable readr lubridate XBRL curl jsonlite lazyeval
#' @importFrom jsonlite fromJSON
#' @export
#' @return where \code{nest_data} is \code{TRUE} a nested tibble by asset,
#' where \code{nest_data} is \code{FALSE} a tibble
#' @family SEC
#' @family Rank and Filed
#' @family XBRL
#' @family entity search
#' @family fund search
#' @export
#'
#' @examples
#' \dontrun{
#' sec_filer(entity_names = 'HLT Holdco', tickers = c('FB'),
#' nest_data = TRUE, parse_subsidiaries = TRUE, parse_all_filing_url_data = TRUE,
#' parse_13F = TRUE, assign_to_environment = TRUE,
#' return_message = TRUE)
#'
#' ## Small Asset Filer Example
#'
#' ## ABS Example
#'
#' #XBRL Example
#'
#'}
sec_filer <-
  function(entity_names = NULL,
           tickers = NULL,
           ciks = NULL,
           tables = NULL,
           nest_data = FALSE,
           parse_all_filing_url_data = FALSE,
           parse_xbrl = FALSE,
           parse_subsidiaries = FALSE,
           parse_13F = FALSE,
           parse_asset_files = FALSE,
           parse_small_offerings = FALSE,
           parse_complete_text_filings = FALSE,
           parse_form_d = FALSE,
           parse_form_3_4s = FALSE,
           assign_to_environment = TRUE,
           return_message = TRUE) {
    has_entities <-
      (('entity_names' %>% exists()) &
         (!entity_names %>% purrr::is_null()))

    has_ciks <-
      (('ciks' %>% exists()) & (!ciks %>% purrr::is_null()))

    has_tickers <-
      (('tickers' %>% exists()) & (!tickers %>% purrr::is_null()))

    has_nothing <-
      ((!has_ciks) & (!has_entities) & (!has_tickers))

    has_tables <-
      (!tables %>% purrr::is_null()) #(('tables' %>% exists()) |

    if (has_nothing) {
      stop("Please enter a CIK, ticker, or an entity name")
    }

    all_ciks <-
      c()

    if (has_entities) {
      sec_filing_entities_safe <-
        purrr::possibly(sec_filing_entities, tibble())

      search_df <-
        entity_names %>%
        sec_filing_entities_safe(return_message = return_message)

      has_rows <-
        search_df %>% nrow() > 0

      if (has_rows) {
        search_ciks <-
          search_df %>%
          .$idCIK
        all_ciks <-
          all_ciks %>%
          append(search_ciks)
      }
    }

    if (has_ciks) {
      all_ciks <-
        all_ciks %>%
        append(ciks)
    }

    .parse_cik_data_safe <-
      possibly(.parse_cik_data, NULL)

    if (all_ciks %>% length() > 0) {
      all_data <-
        all_ciks %>%
        sort() %>%
        future_map_dfr(function(x) {
          .parse_cik_data_safe(
            tables = tables,
            nest_data = nest_data,
            cik = x,
            return_message = return_message
          )
        }) %>%
        mutate(
          urlRankAndFiled =
            list('http://rankandfiled.com/#/filers/', idCIK, '/filings') %>% purrr::invoke(paste0, .)
        ) %>%
        select(idCIK, nameEntity, urlRankAndFiled, nameTable, dataTable) %>%
        distinct() %>%
        suppressWarnings()
    }

    if (has_tickers) {
      .parse_ticker_data_safe <-
        purrr::possibly(.parse_ticker_data, tibble())

      table_exists <-
        'all_data' %>% exists()

      if (table_exists) {
        all_ticker_data <-
          tickers %>%
          future_map_dfr(function(x) {
            .parse_ticker_data(
              ticker = x,
              nest_data = nest_data,
              tables = tables,
              return_message = return_message
            )
          }) %>%
          suppressWarnings()

        all_data <-
          all_data %>%
          bind_rows(all_ticker_data)
      } else {
        all_data <-
          tickers %>%
          future_map_dfr(function(x) {
            .parse_ticker_data_safe(ticker = x,
                                    tables = tables,
                                    return_message = return_message)
          }) %>%
          suppressWarnings()
      }
    }

    if (has_tables) {
      table_options <-
        c(
          'General',
          'CIK Filings',
          'Filings',
          'Private Offerings',
          'Related Parties',
          'Traders',
          'C Level',
          'MDA',
          'Owners',
          'Insider Trades',
          'Trades'
        )
      table_names <-
        tables %>% str_to_lower() %>% paste0(collapse = "|")

      wrong_table <-
        table_options %>% str_to_lower() %>% str_count(table_names) %>% sum() == 0

      if (wrong_table) {
        stop("Sorry tables can only be:\n" %>% paste0(paste0(table_options, collapse = '\n')))
      }

      all_data <-
        all_data %>%
        mutate(table = nameTable %>% str_to_lower()) %>%
        filter(table %>% str_detect(table_names)) %>%
        select(-table)
    }

    if (!'all_data' %>% exists()) {
      return(tibble())
    }

    missing_ciks <-
      all_ciks[!all_ciks %in% all_data$idCIK] %>% length() > 0

    if (missing_ciks) {
      list("Missing ", all_ciks[!all_ciks %in% all_data$idCIK] %>% paste(collapse = ', ')) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    all_data <-
      all_data %>%
      select(-dplyr::matches("urlRankAndFiled"))

    has_filings <-
      c('CIK Filings', 'Filings') %in% all_data$nameTable %>% sum() > 0

    if (has_filings) {
      filing_df <-
        all_data %>%
        filter(nameTable %in% c('Filings', 'CIK Filings')) %>%
        select(dataTable) %>%
        unnest() %>%
        distinct()


      filing_df <-
        filing_df %>%
        mutate_at(filing_df %>% select(dplyr::matches("^url")) %>% names(),
                  funs(. %>% str_to_lower()))


      filing_df <-
        filing_df %>%
        mutate_at(filing_df %>% select(dplyr::matches("url^[A-Z]")) %>% names(),
                  funs(. %>% str_replace_all('archives', 'Archives')))

      filing_df <-
        filing_df %>%
        mutate(
          urlSECFilingDirectory = urlSECFilingDirectory %>% gsub('archives', 'Archives', .),
          urlSEC = urlSEC %>% gsub('archives', 'Archives', .)
        )

      has_subsidiaries <-
        (filing_df %>%
           filter(typeFiling == "SUBSIDIARIES OF THE REGISTRANT") %>%
           nrow() > 0) & (parse_subsidiaries)

      if (has_subsidiaries) {
        parse_sec_subsidiary_url_safe <-
          purrr::possibly(.parse_sec_subsidiary_url, tibble())

        has_list <-
          filing_df %>%
          filter(typeFiling == "LIST OF SUBSIDIARIES") %>%
          nrow() > 0

        sub_url_df <-
          filing_df %>%
          filter(
            typeFiling %in% c(
              "SUBSIDIARIES OF THE REGISTRANT",
              "SUBSIDIARIES OF HOLDING COMPANY"
            )
          ) %>%
          select(dateFiling, nameEntity, urlSEC) %>%
          distinct()

        if (has_list) {
          sub_url_list_df <-
            filing_df %>%
            filter(
              typeFiling %>% str_detect(
                "LIST OF SUBSIDIARIES|LIST OF SIGNIFICANT SUBSIDIARIES|LIST OF SIGNIFCANT"
              )
            ) %>%
            select(dateFiling, nameEntity, urlSEC) %>%
            distinct()

          if ('sub_url_df' %>% exists()) {
            sub_url_df <-
              sub_url_list_df %>%
              bind_rows(sub_url_df)
          } else {
            sub_url_df <-
              sub_url_list_df
          }
        }

        sub_df <-
          sub_url_df %>%
          arrange(dateFiling) %>%
          .$urlSEC %>%
          future_map_dfr(function(x) {
            parse_sec_subsidiary_url_safe(url = x, return_message = return_message)
          }) %>%
          suppressWarnings()

        if (sub_df %>% nrow() > 0) {
          sub_df <-
            sub_df %>%
            select(-dplyr::matches("X|date")) %>%
            filter(
              !nameSubsidiary %in% c(
                '(I)',
                '(II)',
                '(III)',
                '(IV)',
                '(V)',
                '(VI)',
                '(VII)',
                '(VIII)',
                '(IX)',
                '(X)',
                'PART A'
              )
            ) %>%
            left_join(sub_url_df) %>%
            select(idCIK, dateFiling, everything()) %>%
            suppressMessages() %>%
            distinct()

          active_date_df <-
            sub_df %>%
            group_by(nameSubsidiary) %>%
            summarise(
              dateFirstFiled = min(dateFiling, na.rm = TRUE),
              dateLastFiled = max(dateFiling, na.rm = TRUE),
              isActiveSubsidiary = ifelse(
                dateLastFiled == sub_df$dateFiling %>% max(na.rm = TRUE),
                TRUE,
                FALSE
              )
            ) %>%
            ungroup()

          sub_df <-
            sub_df %>%
            left_join(active_date_df) %>%
            left_join(sub_url_df) %>%
            suppressMessages()

          sub_df <-
            sub_df %>%
            mutate(nameSubsidiaryRF = nameSubsidiary %>% str_replace_all('\\,|\\.', '')) %>%
            select(idCIK, nameEntity, dateFiling, everything()) %>%
            suppressMessages()

          has_sub_df <-
            'Subsidiaries' %in% all_data$nameTable

          if (has_sub_df) {
            ad_sub_df <-
              all_data %>%
              filter(nameTable == 'Subsidiaries') %>%
              select(dataTable) %>%
              unnest()

            if ('pctSubsidiaryOwned' %in% names(ad_sub_df)) {
              sub_df <-
                sub_df %>%
                left_join(
                  ad_sub_df %>%
                    select(nameSubsidiaryRF = nameSubsidiary, pctSubsidiaryOwned) %>%
                    distinct()
                ) %>%
                suppressMessages() %>%
                select(-nameSubsidiaryRF)
            }

            if (nest_data) {
              sub_df <-
                sub_df %>%
                nest(-c(dateFiling, idCIK, nameEntity), .key = dataSubsidiaries)
            }
            a_sub_df <-
              sub_df %>%
              group_by(idCIK, nameEntity) %>%
              nest(-c(idCIK, nameEntity), .key = dataTable) %>%
              ungroup() %>%
              mutate(nameTable = 'Subsidiaries')

            all_data <-
              all_data %>%
              filter(!nameTable == 'Subsidiaries') %>%
              bind_rows(a_sub_df)

          } else {
            if (nest_data) {
              sub_df <-
                sub_df %>%
                nest(-c(dateFiling, idCIK, nameEntity), .key = dataSubsidiaries)
            }
            a_sub_df <-
              sub_df %>%
              group_by(idCIK, nameEntity) %>%
              nest(-c(idCIK, nameEntity), .key = dataTable) %>%
              ungroup() %>%
              mutate(nameTable = 'Subsidiaries')

            all_data <-
              all_data %>%
              filter(!nameTable == 'Subsidiaries') %>%
              bind_rows(a_sub_df)
          }
        }
      }

      parse_for_tables_rf_safe <-
        purrr::possibly(.parse_for_tables_rf, tibble())

      tables_edgar <-
        parse_for_tables_rf_safe(
          filing_df = filing_df,
          parse_complete_text_filings = parse_complete_text_filings,
          parse_form_d = parse_form_d,
          parse_13F = parse_13F,
          parse_small_offerings = parse_small_offerings,
          parse_form_3_4s = parse_form_3_4s,
          parse_asset_files = parse_asset_files,
          parse_xbrl = parse_xbrl
        )
      has_edgar_tables <-
        tables_edgar %>% nrow() > 0

      if (has_edgar_tables) {

        all_data <-
          all_data %>%
          nest(-nameTable, .key = dataTable) %>%
          bind_rows(tables_edgar)

      }
    }

    if (assign_to_environment) {
      table_name_df <-
        all_data %>%
        select(nameTable) %>%
        distinct() %>%
        mutate(
          nameDF =
            list('dataFiler', nameTable %>% str_replace_all('\\ ', '')) %>% purrr::invoke(paste0, .)
        )

      1:nrow(table_name_df) %>%
        walk(function(x) {
          df_name <-
            table_name_df %>% slice(x) %>% .$nameDF
          df_name %>% cat(fill = T)
          df_data <-
            all_data %>%
            filter(nameTable == table_name_df$nameTable[[x]]) %>%
            select(dplyr::matches(c('idCIK|nameEntity|dataTable'))) %>%
            unnest() %>%
            suppressWarnings() %>%
            remove_duplicate_columns()

          has_unnest2 <-
            names(df_data) %>% str_detect('data') %>% sum(na.rm = TRUE) > 1

          if (has_unnest2) {
            base_names <-
              df_data %>% remove_duplicate_columns() %>% dplyr::select(-dplyr::matches("data")) %>% names()

            df_data_names <-
              names(df_data)[names(df_data) %>% str_detect('data')]

            for (x in seq_along(df_data_names)) {
              df_data_name <-
                df_data_names[[x]]
              table <-
                df_data %>%
                select(one_of(c(base_names, df_data_name))) %>%
                remove_duplicate_columns()
              is_null_col <-
                table[,df_data_name] %>% magrittr::extract2(1) %>% map_lgl(is_null)

              table <-
                table %>%
                mutate(is_null_col) %>%
                filter(!is_null_col) %>%
                unnest() %>%
                remove_duplicate_columns() %>%
                select(which(colMeans(is.na(.)) < 1)) %>%
                # tidy_column_formats() %>%
                select(-dplyr::matches('is_null_col')) %>%
                distinct()

              df_table_name <-
                list(df_name, df_data_name %>% str_replace_all('data', '')) %>% purrr::reduce(paste0)
              assign(x = df_table_name,
                     eval(table),
                     envir = .GlobalEnv)
            }

          } else {
            has_unnest <-
              df_data %>% names() %>% str_detect('data') %>% sum(na.rm = TRUE) > 0
            if (has_unnest) {
              if (df_name %>% str_detect("General")) {
                table <-
                  df_data %>%
                  remove_duplicate_columns() %>%
                  select(-dplyr::matches("data")) %>%
                  # tidy_column_formats() %>%
                  select(which(colMeans(is.na(.)) < 1)) %>%
                  distinct()
                assign(x = df_name,
                       eval(table),
                       envir = .GlobalEnv)
              }
              if (df_name %in% 'dataFilerTextFilings') {
                table <-
                  df_data %>%
                  unnest() %>%
                  select(which(colMeans(is.na(.)) < 1)) %>%
                  tidy_column_formats() %>%
                  distinct()
                assign(x = df_name,
                       eval(table),
                       envir = .GlobalEnv)
              }
              if (df_name %in% 'dataFilerFilingDirectories') {
                table <-
                  df_data %>%
                  select(-dplyr::matches('data')) %>%
                  filter(!idCIK %>% is.na()) %>%
                  select(which(colMeans(is.na(.)) < 1)) %>%
                  # tidy_column_formats() %>%
                  distinct()
                assign(x = df_name,
                       eval(table),
                       envir = .GlobalEnv)
              }

              other <-
                (!df_name %>% str_detect("General")) & (!df_name %in% c('dataFilerFilingDirectories', 'dataFilerTextFilings'))

              if (other) {
                df_data <-
                  df_data %>%
                  remove_duplicate_columns() %>%
                  # select(dplyr::matches("data")) %>%
                  unnest()

                select_cols <-
                  tibble(nameData = names(df_data)) %>%
                  mutate(idColumn = 1:n()) %>%
                  group_by(nameData) %>%
                  mutate(countColumn = 1:n()) %>%
                  ungroup() %>%
                  filter(countColumn == min(countColumn)) %>%
                  .$idColumn

                df_data <-
                  df_data[, select_cols]

                table <-
                  df_data %>%
                  select(which(colMeans(is.na(.)) < 1)) %>%
                  # tidy_column_formats() %>%
                  distinct()
                assign(x = df_name,
                       eval(table),
                       envir = .GlobalEnv)
              }
            } else {
              table <-
                df_data %>%
                select(which(colMeans(is.na(.)) < 1)) %>%
                # tidy_column_formats() %>%
                distinct()
              assign(x = df_name,
                     eval(table),
                     envir = .GlobalEnv)
            }
          }
        })
    }

    return(all_data)
  }


# insider -----------------------------------------------------------------

.parse_json_general_insider <-
  function(cik = 1354879,
           nest_data = TRUE,
           return_message = TRUE) {
    url <-
      list('http://rankandfiled.com/data/insider/', cik, '/general') %>%
      purrr::invoke(paste0, .)
    if (!url %>% httr::url_ok()) {
      return(tibble())
    }

    data <-
      url %>%
      jsonlite::fromJSON() %>%
      .[['insider']]

    general_cols <-
      data %>% future_map_dfr(class) %>%
      gather(item, value) %>%
      filter(!value %>% str_detect(c('list', 'data.frame'))) %>%
      .$item %>%
      suppressWarnings()

    general_df <-
      data %>%
      data.frame(stringsAsFactors = FALSE) %>%
      dplyr::select(one_of(general_cols)) %>%
      .resolve_name_df() %>%
      distinct()

    has_filer <-
      'filer' %in% names(data)

    if (has_filer) {
      filing_df <-
        data$filer %>%
        flatten_df() %>%
        .resolve_name_df()

      if ('name' %in% names(filing_df)) {
        filing_df <-
          filing_df %>%
          select(-name)
      }

      if ('detailsOwnedBy' %in% names(filing_df)) {
        filing_df <-
          filing_df %>%
          dplyr::rename(detailsOwns = detailsOwnedBy)
      }

      if ('detailsOwns' %in% names(filing_df)) {
        detail_df <-
          seq_along(filing_df$detailsOwns) %>%
          future_map_dfr(function(x) {
            detail_value <-
              filing_df$detailsOwns[[x]]

            if (detail_value %>% is.na()) {
              df <-
                tibble(idRow = x, nameCompanyOwns = NA)
              if (nest_data) {
                df <-
                  df %>%
                  nest(-idRow, .key = dataInsiderCompanies)
              }
              return(df)
            }

            values <-
              detail_value %>% str_replace('\\|', '') %>%
              str_split('\\|') %>%
              flatten_chr()

            df_data <-
              tibble(value = values) %>%
              tidyr::separate(value,
                              into = c('idTickerOwns', 'other'),
                              sep = '\\:') %>%
              tidyr::separate(other,
                              into = c('nameCompanyOwns', 'other'),
                              sep = '\\_') %>%
              tidyr::separate(other,
                              into = c('roleOwner', 'dateOwner'),
                              sep = '\\#') %>%
              mutate(nameCompanyOwns = nameCompanyOwns %>% str_to_upper(),
                     idRow = x) %>%
              gather(item, value, -idRow, na.rm = TRUE) %>%
              group_by(item) %>%
              mutate(count = 1:n() - 1) %>%
              ungroup() %>%
              arrange((count)) %>%
              mutate(item = ifelse(count == 0, item, paste0(item, count))) %>%
              select(-count)

            column_order <-
              c('idRow', df_data$item)

            df_data <-
              df_data %>%
              spread(item, value) %>%
              select(one_of(column_order))

            if (nest_data) {
              df_data <-
                df_data %>%
                nest(-idRow, .key = dataInsiderCompanies)
            }

            return(df_data)
          }) %>%
          suppressWarnings()

        detail_df <-
          detail_df %>%
          mutate_at(.vars = detail_df %>% select(dplyr::matches("date")) %>% names(),
                    funs(. %>% ymd())) %>%
          suppressWarnings()

        filing_df <-
          filing_df %>%
          mutate(idRow = 1:n()) %>%
          select(-detailsOwns) %>%
          left_join(detail_df) %>%
          select(-idRow) %>%
          suppressMessages()
      }

      general_df <-
        general_df %>%
        left_join(filing_df) %>%
        suppressMessages()
    }

    has_companies <-
      'companies' %in% names(data)

    if (has_companies) {
      companies_df <-
        data$companies %>%
        as_tibble() %>%
        .resolve_name_df()

      company_name_df <-
        companies_df %>%
        select(-dplyr::matches("status_history")) %>%
        gather(item, value, -c(idCIK, nameFiler)) %>%
        group_by(item) %>%
        mutate(countItem = 1:n() - 1) %>%
        ungroup() %>%
        mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
        select(-countItem) %>%
        suppressWarnings() %>%
        suppressMessages()

      col_order <-
        c('idCIK', 'nameFiler', company_name_df$item)

      company_name_df <-
        company_name_df %>%
        spread(item, value) %>%
        select(one_of(col_order))

      company_name_df <-
        company_name_df %>%
        mutate_at(company_name_df %>% select(dplyr::matches("idCIK")) %>% names(),
                  funs(. %>% as.numeric()))

      companies_df <-
        companies_df %>%
        mutate(idRow = 1:n())

      if ('status_history' %in% names(companies_df)) {
        status_df <-
          seq_along(companies_df$status_history) %>%
          future_map_dfr(function(x) {
            df <-
              companies_df$status_history[[x]] %>%
              as_tibble() %>%
              mutate(idRow = x) %>%
              select(-dplyr::matches("other|pair_id")) %>%
              gather(item, value, -idRow) %>%
              left_join(tibble(
                item = c('date', 'officer', 'title', 'ten_percent', 'director'),
                nameItem = c(
                  'dateAppointment',
                  'isOfficer',
                  'titleOfficer',
                  'is10PercentOwner',
                  'isDirector'
                )
              )) %>%
              select(-item) %>%
              group_by(nameItem) %>%
              mutate(countItem = 1:n() - 1) %>%
              ungroup() %>%
              mutate(item = ifelse(countItem == 0, nameItem, nameItem %>% paste0(countItem))) %>%
              select(idRow, item, value) %>%
              spread(item, value) %>%
              suppressMessages() %>%
              suppressWarnings()
            return(df)
          })

        status_df <-
          status_df %>%
          mutate_at(status_df %>% select(dplyr::matches("date")) %>% names(),
                    funs(. %>% lubridate::ymd())) %>%
          mutate_at(status_df %>% select(dplyr::matches("is")) %>% names(),
                    funs(. %>% as.logical())) %>%
          mutate_at(status_df %>% select(dplyr::matches("date")) %>% names(),
                    funs(. %>% as.character()))

        companies_df <-
          companies_df %>%
          select(-dplyr::matches("status")) %>%
          left_join(status_df) %>%
          suppressWarnings() %>%
          suppressMessages() %>%
          gather(item, value, -c(idCIK, nameFiler, idRow)) %>%
          group_by(item, idRow) %>%
          mutate(countItem = 1:n() - 1) %>%
          ungroup() %>%
          mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
          select(-countItem) %>%
          suppressWarnings()

        col_order <-
          c('idCIK', 'nameFiler', companies_df$item)

        companies_df <-
          companies_df %>%
          spread(item, value) %>%
          select(one_of(col_order)) %>%
          suppressWarnings()

        companies_df <-
          companies_df %>%
          mutate_at(status_df %>% select(dplyr::matches("date")) %>% names(),
                    funs(. %>% lubridate::ymd())) %>%
          mutate_at(status_df %>% select(dplyr::matches("^is|^has")) %>% names(),
                    funs(. %>% as.logical()))

      } else {
        companies_df <-
          company_name_df
      }

      if (nest_data) {
        companies_df <-
          companies_df %>%
          mutate(idRow = 1:n()) %>%
          nest(-c(idRow, idCIK), .key = dataDetailsCompaniesOwned) %>%
          as_tibble()
      }

      general_df <-
        general_df %>%
        mutate(idRow = 1:n()) %>%
        left_join(companies_df) %>%
        select(-idRow) %>%
        suppressMessages()

    }

    general_df <-
      general_df %>%
      mutate(urlJSONGeneral = url)

    if ('typeCompany' %in% names(general_df)) {
      general_df <-
        general_df %>%
        dplyr::rename(typeFiler = typeCompany)
    }

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(general_df)


  }

.parse_insider_trade_json_url <-
  function(url = "http://rankandfiled.com/data/insider/1070844/trades?start=0",
           return_message = TRUE) {
    if (!url %>% httr::url_ok()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)

    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/insider/|/trades', '') %>%
      str_split('\\?') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()

    trade_df <-
      json_data$trades %>%
      as_tibble() %>%
      dplyr::rename(dateTrade = date) %>%
      mutate(dateTrade = dateTrade %>% lubridate::ymd())

    count_columns <-
      trade_df$trade %>%
      map_dbl(function(x) {
        x %>%
          str_count('\\*')
      }) %>%
      max() + 1

    column_names <-
      list("X", 1:count_columns) %>%
      purrr::invoke(paste0, .)

    trade_df <-
      trade_df %>%
      separate(trade, column_names, sep = '\\*') %>%
      suppressWarnings()

    trade_df_names <-
      c(
        'dateTrade',
        "idCIK",
        "idCIKOwns",
        "idInsiderType",
        "countSharesOwned",
        "descriptionOption",
        "idTypeInsiderTransaction",
        "amountPrice",
        "countShares",
        "idInsiderTransaction",
        "X10",
        "detailOwnershipIndirect",
        "priceExcercised",
        "dateOptionExcercisable",
        "dateOptionExpiry",
        "countSharesOptions",
        "typeSecurityOption",
        "X17"
      )

    trade_df <-
      trade_df %>%
      purrr::set_names((trade_df_names)[1:ncol(trade_df)]) %>%
      select(-dplyr::matches("X"))

    trade_df <-
      trade_df %>%
      mutate_at(.vars =
                  trade_df %>% select(dplyr::matches("date")) %>% names(),
                .funs = lubridate::ymd) %>%
      mutate_at(.vars =
                  trade_df %>% select(dplyr::matches("idCIK|count|amount|price")) %>% names(),
                funs(. %>% as.character() %>% readr::parse_number())) %>%
      left_join(tibble(
        idInsiderType = c("D", "ND"),
        typeInsider = c("Director", "Non-Director")
      )) %>%
      left_join(get_insider_code_df()) %>%
      left_join(
        tibble(
          idTypeInsiderTransaction = c("A", "D", "None"),
          typeInsiderTransaction = c('Purchase', 'Sale', 'None'),
          isBought = c(TRUE, FALSE, NA)
        )
      ) %>%
      suppressMessages() %>%
      suppressWarnings()

    trade_df <-
      trade_df %>%
      mutate(
        countShares = ifelse(isBought == T, countShares, -countShares),
        amountTransaction = countShares * amountPrice,
        urlJSON = url
      )


    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(trade_df)

  }

.parse_insider_trades <-
  function(cik = 1070844,
           nest_data = TRUE,
           return_message = TRUE) {
    url_general <-
      list('http://rankandfiled.com/data/insider/', cik, '/general') %>%
      purrr::invoke(paste0, .)

    general_df <-
      .parse_json_general_insider(cik = cik,
                                 nest_data = nest_data,
                                 return_message = TRUE)

    cik <-
      general_df$idCIK

    insider <-
      general_df$nameEntity %>%
      str_to_upper()

    count_trades <-
      general_df$countTrades %/% 50

    trade_urls <-
      list(
        'http://rankandfiled.com/data/insider/',
        cik,
        '/trades?start=',
        seq(0, by = 50, length.out = count_trades)
      ) %>%
      purrr::invoke(paste0, .)

    parse_insider_trade_json_url_safe <-
      purrr::possibly(.parse_insider_trade_json_url, tibble())

    all_data <-
      trade_urls %>%
      future_map_dfr(function(x) {
        .parse_insider_trade_json_url(url = x, return_message = return_message)
      }) %>%
      distinct()

    ciks_owned <-
      all_data$idCIKOwns %>% unique()

    company_urls_general <-
      list('http://rankandfiled.com/data/filer/',
           ciks_owned,
           '/general') %>%
      purrr::invoke(paste0, .)

    owned_company_df <-
      company_urls_general %>%
      future_map_dfr(function(x) {
        .parse_json_general_filing(url = x,
                                   return_message = TRUE,
                                   nest_data = nest_data)
      })

    owned_df <-
      owned_company_df %>%
      select(dplyr::matches('idCIK|nameEntity|idTicker')) %>%
      select(-dplyr::matches("idCIKOwnedBy"))

    names(owned_df) <-
      names(owned_df) %>% paste0('Owns')

    all_data <-
      all_data %>%
      mutate(nameInsider = insider) %>%
      left_join(owned_df) %>%
      select(
        dateTrade,
        nameInsider,
        idCIK,
        nameEntityOwns,
        dplyr::matches('idCIKOwns|idTickerOwns'),
        everything()
      ) %>%
      suppressWarnings() %>%
      suppressMessages()

    all_data <-
      all_data %>%
      mutate_at(.vars = all_data %>% select(dplyr::matches("amount|price")) %>% names(),
                funs(. %>% formattable::currency(digits = 2))) %>%
      mutate_at(.vars = all_data %>% select(dplyr::matches("count")) %>% names(),
                funs(. %>% formattable::comma(digits = 0))) %>%
      mutate_if(is.numeric, as.numeric)

    if (return_message) {
      list(
        "Parsed ",
        all_data %>% nrow() %>% formattable::comma(digits = 0),
        ' insider transactions for ',
        insider
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }
    return(all_data)
  }

.parse_insider_filings <-
  function(cik = 1070844,
           nest_data = TRUE,
           return_message = TRUE) {
    general_df <-
      .parse_json_general_insider(cik = cik,
                                 nest_data = nest_data,
                                 return_messag = TRUE)

    cik <-
      general_df$idCIK

    insider <-
      general_df$nameEntity %>%
      str_to_upper()

    count_filings <-
      general_df$countFilings %/% 50

    filing_urls <-
      list(
        'http://rankandfiled.com/data/filer/',
        cik,
        '/all?start=',
        seq(0, by = 50, length.out = count_filings)
      ) %>%
      purrr::invoke(paste0, .)

    .parse_json_public_filers_safe <-
      purrr::possibly(.parse_json_public_filers, NULL)

    .all_filings <-
      filing_urls %>%
      future_map_dfr(function(x) {
        .parse_json_public_filers_safe(url = x, return_message = return_message)
      }) %>%
      distinct() %>%
      suppressWarnings() %>%
      mutate(nameInsider = insider) %>%
      select(idCIK, nameInsider, everything())

    if (return_message) {
      list("Parsed ", .all_filings %>% nrow(), ' SEC Filings for ', insider) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(.all_filings)

  }

# funds -------------------------------------------------------------------
.generate_fund_general_url <-
  function(cik = 1034621) {

    glue("http://rankandfiled.com/data/fund/{cik}/general") %>% as.character()

  }

.parse_json_fund_general <-
  function(cik = 1034621,
           return_message = TRUE) {
    url <-
      cik %>%
      .generate_fund_general_url()

    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    general_cols <-
      json_data %>% future_map_dfr(class) %>%
      gather(item, value) %>%
      filter(!value %in% (c('list', 'data.frame'))) %>%
      .$item %>%
      suppressWarnings()

    general_df <-
      json_data %>%
      data.frame(stringsAsFactors = FALSE) %>%
      dplyr::select(one_of(general_cols)) %>%
      .resolve_name_df() %>%
      distinct() %>%
      select(-dplyr::matches("descriptionClasses"))

    has_funds <-
      'funds' %in% names(json_data)

    if (has_funds) {
      general_df <-
        general_df %>%
        left_join(json_data$funds %>%
                    .resolve_name_df() %>%
                    mutate(idCIK = cik)) %>%
        suppressMessages()
    }

    has_filer <-
      'filer' %in% names(json_data)

    if (has_filer) {
      filer_df <-
        json_data$filer %>%
        as_tibble()

      filer_df <-
        filer_df %>%
        .resolve_name_df()
      if (!'idCIK' %in% names(filer_df)) {
        filer_df <-
          filer_df %>%
          mutate(idCIK = cik)
      }

      if ('name' %in% names(filer_df)) {
        filer_df <-
          filer_df %>%
          mutate(nameEntity = nameEntity %>% stringr::str_to_upper()) %>%
          select(-name)
      }
      filer_df <-
        filer_df %>%
        mutate_at(filer_df %>% select(dplyr::matches("idRF|idCIK")) %>% names(),
                  funs(. %>% as.numeric()))

      merge_cols <-
        c('idCIKFiler', 'idRow', names(filer_df)[!names(filer_df) %in% names(general_df)])

      general_df <-
        general_df %>%
        mutate(idRow = 1:n()) %>%
        left_join(
          filer_df %>%
            mutate(idRow = 1:n()) %>%
            dplyr::rename(idCIKFiler = idCIK) %>%
            select(one_of(merge_cols))
        ) %>%
        select(-dplyr::matches("^object|idRow")) %>%
        distinct() %>%
        suppressMessages()

    }

    general_df <-
      general_df %>%
      select(idCIK,
             nameEntity,
             dplyr::matches("name"),
             dplyr::matches("id"),
             everything())

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(general_df)

  }


# for_table ---------------------------------------------------------------

.parse_for_tables_rf <-
  function(filing_df,
           parse_complete_text_filings = TRUE,
           parse_form_d = TRUE,
           parse_13F = TRUE,
           parse_small_offerings = TRUE,
           parse_form_3_4s = TRUE,
           parse_asset_files = TRUE,
           parse_xbrl = TRUE,
           nest_data = TRUE,
           return_message = TRUE) {
    all_tables <-
      tibble()
    parse_all_filings <-
      c(
        parse_complete_text_filings,
        parse_form_d,
        parse_13F,
        parse_small_offerings,
        parse_form_3_4s,
        parse_asset_files,
        parse_xbrl
      ) %>%
      sum() > 0

    parse_form_data_safe <-
      purrr::possibly(.parse_form_data, tibble())

    if (parse_all_filings) {
      if (!'typeFile' %in% names(filing_df)) {
        filing_df <-
          filing_df %>%
          mutate(typeFile = ifelse(urlSECFilingDirectory %>% str_detect('htm'),
                                   'html', NA))
      }

      search_df <-
        filing_df %>%
        select(dateFiling,
               dplyr::matches("typeFile"),
               dplyr::matches("idForm"),
               urlSECFilingDirectory) %>%
        distinct() %>%
        filter(!urlSECFilingDirectory %>% is.na()) %>%
        distinct()

      df_all_filing_urls <-
        search_df$urlSECFilingDirectory %>%
        unique() %>%
        future_map_dfr(function(x){
          .parse_sec_filing_index(urls = x)
        })

      df_all_filing_urls <-
        df_all_filing_urls %>%
        mutate(isForm3_4 = ifelse(typeForm %in% c("3", "4") &
                                    typeFile == "xml", TRUE, FALSE))
      df_urls <-
        df_all_filing_urls %>%
        mutate(nameTable = 'Filing Directories') %>%
        nest(-nameTable, .key = dataTable)

      all_tables <-
        all_tables %>%
        bind_rows(df_urls)

      if (parse_complete_text_filings) {
        if (!'urlTextFilingFull' %in% names(df_all_filing_urls)) {
          df_all_filing_urls <-
            df_all_filing_urls %>%
            mutate(urlTextFilingFull = urlSECFilingDirectory %>% str_replace_all("-index.htm", ".txt"))
        }
        urls <-
          df_all_filing_urls$urlTextFilingFull %>%
          unique()
        sec_complete_filings_safe <-
          purrr::possibly(.sec_complete_filings, tibble())
        all_text_df <-
          .sec_complete_filings(urls = urls)

        all_tables <-
          all_tables %>%
          bind_rows(tibble(
            nameTable = 'Text Filings',
            dataTable = list(all_text_df %>% nest(-c(idCIK), .key = dataFilings))
          ))
      }

      if (parse_form_d) {
        df_form_ds <-
          df_all_filing_urls %>%
          parse_form_data_safe(filter_parameter = 'isFormD')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(
            nameTable = 'FormDs',
            dataTable = list(df_form_ds)
          ))
      }

      if (parse_13F) {
        df_13F <-
          df_all_filing_urls %>%
          parse_form_data_safe(filter_parameter = 'is13FFiling')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = '13Fs', dataTable = list(df_13F)))
      }

      if (parse_small_offerings) {
        df_small_offerings <-
          df_all_filing_urls %>%
          parse_form_data_safe(filter_parameter = 'hasSmallOfferingData')
        all_tables <-
          all_tables %>%
          bind_rows(tibble(
            nameTable = 'Small Offerings',
            dataTable = list(df_small_offerings)
          ))
      }

      if (parse_form_3_4s) {
        df_form3_4 <-
          df_all_filing_urls %>%
          parse_form_data_safe(filter_parameter = 'isForm3_4')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(
            nameTable = 'Form 3 and 4',
            dataTable = list(df_form3_4)
          ))
      }

      if (parse_asset_files) {
        df_assets <-
          df_all_filing_urls %>%
          parse_form_data_safe(filter_parameter = 'hasAssetFile')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = 'Asset Data', dataTable = list(df_assets)))
      }

      if (parse_xbrl) {
        df_xbrl <-
          df_all_filing_urls %>%
          parse_form_data_safe(filter_parameter = 'isXBRLInstanceFile')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = 'XBRL', dataTable = list(df_xbrl)))
      }

    }

    all_tables <-
      all_tables %>%
      mutate(countCols = dataTable %>% map_dbl(ncol)) %>%
      filter(countCols > 0) %>%
      select(-countCols)

    return(all_tables)
  }


# filing_stream -----------------------------------------------------------

.get_most_recent_rf_id <-
  function(url = "http://rankandfiled.com/data/latest") {
    json_data <-
      url %>%
      jsonlite::fromJSON()

    json_data$filings$id %>% as.numeric() %>% max()
  }


.parse_filing_stream <-
  function(url = "http://rankandfiled.com/data/latest?group=ALL&filer=All",
           nest_data = TRUE,
           return_message = TRUE) {
    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)

    filing_class_df <-
      json_data$filings %>% future_map_dfr(class) %>%
      gather(column, type) %>%
      mutate(idName = 1:n())

    general_df <-
      json_data$filings %>%
      select(filing_class_df %>%
               filter(!type %in% c('list', 'data.frame')) %>%
               .$idName)

    general_df <-
      general_df %>%
      as_tibble() %>%
      mutate_all(funs(. %>% str_replace('\\|', '')))

    general_df <-
      general_df %>%
      .resolve_name_df() %>%
      distinct()

    general_df <-
      general_df %>%
      mutate_at(general_df %>% select(dplyr::matches("^datetime[A-Z]")) %>% names(),
                funs(. %>% lubridate::ymd_hms())) %>%
      mutate_at(general_df %>% select(dplyr::matches("dateFiled")) %>% names(),
                funs(. %>% lubridate::ymd())) %>%
      mutate_at(general_df %>% select(dplyr::matches("idRF|idCIK")) %>% names(),
                funs(. %>% as.numeric())) %>%
      mutate_at(general_df %>% select(dplyr::matches("^is|^has")) %>% names(),
                funs(. %>% as.logical())) %>%
      mutate_at(general_df %>% select(dplyr::matches("^description|^type")) %>% names(),
                funs(. %>% stringr::str_to_upper())) %>%
      mutate(urlSEC = ifelse(
        slugSEC == "None",
        NA,
        list(
          "https://www.sec.gov/Archives/edgar/data/",
          idCIK,
          '/',
          slugSEC
        ) %>% purrr::invoke(paste0, .)
      ))

    if ('idFormType' %in% names(general_df)) {
      general_df %>%
        left_join(dictionary_sec_filing_codes()) %>%
        suppressMessages()
    }

    has_filer <-
      'filer' %in% names(json_data$filings)

    if (has_filer) {
      filer_df <-
        json_data$filings$filer %>%
        as_tibble()

      filer_df <-
        filer_df %>%
        .resolve_name_df()

      if ('name' %in% names(filer_df)) {
        filer_df <-
          filer_df %>%
          dplyr::rename(nameLegal = name) %>%
          mutate(nameEntity = nameEntity %>% stringr::str_to_upper())
      }
      filer_df <-
        filer_df %>%
        mutate_at(filer_df %>% select(dplyr::matches("idRF|idCIK")) %>% names(),
                  funs(. %>% as.numeric())) %>%
        mutate_at(filer_df %>% select(dplyr::matches("^name|^industry|^typeFund|^details")) %>% names(),
                  funs(. %>% stringr::str_to_upper()))

      if ('detailsOwnedBy' %in% names(filer_df)) {
        filer_df <-
          filer_df %>%
          dplyr::rename(detailsOwns = detailsOwnedBy)

        filer_df <-
          filer_df %>%
          mutate(idRow = 1:n(),
                 detailsOwns = detailsOwns %>% str_replace("\\|", ''))

        owns_df <-
          1:nrow(filer_df) %>%
          future_map_dfr(function(x) {
            owns <-
              filer_df$detailsOwns[[x]] %>%
              str_split("\\|") %>%
              flatten_chr()

            df <-
              tibble(idRow = x, owns) %>%
              tidyr::separate(owns,
                              into = c('idTickerOwns', 'owns'),
                              sep = '\\:') %>%
              tidyr::separate(owns,
                              into = c('nameCompanyOwns', 'owns'),
                              sep = '\\_') %>%
              tidyr::separate(
                owns,
                into = c('typeOwnerOwns', 'dateOwnershipOwns'),
                sep = '\\#'
              ) %>%
              mutate(countItem = 1:n() - 1) %>%
              mutate(
                nameCompanyOwns = nameCompanyOwns %>% str_to_upper(),
                idTickerOwns = idTickerOwns %>% str_to_upper()
              ) %>%
              gather(item, value, -c(idRow, countItem)) %>%
              mutate(value = ifelse(value == '', NA, value)) %>%
              mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
              arrange(countItem) %>%
              select(-countItem)

            col_order <-
              c('idRow', df$item)
            df <-
              df %>%
              spread(item, value) %>%
              select(one_of(col_order))

            df <-
              df %>%
              mutate_at(df %>% select(dplyr::matches("date")) %>% names(),
                        funs(. %>% lubridate::ymd()))

            if (nest_data) {
              df <-
                df %>%
                nest(-idRow, .key = dataCompaniesOwns)
            }
            return(df)
          }) %>%
          suppressWarnings()

        filer_df <-
          filer_df %>%
          left_join(owns_df) %>%
          select(-idRow) %>%
          suppressMessages()
      }


      general_df <-
        general_df %>%
        mutate(idRow = 1:n()) %>%
        left_join(
          filer_df %>%
            mutate(idRow = 1:n()) %>%
            dplyr::rename(idCIKFiler = idCIK) %>%
            select(one_of(
              c('idCIKFiler', 'idRow'), names(filer_df)[!names(filer_df) %in% names(general_df)]
            ))
        ) %>%
        select(-dplyr::matches("^object|idRow")) %>%
        distinct() %>%
        suppressMessages()

    }

    has_offerings <-
      'offerings' %in% names(json_data$filings)

    if (has_offerings) {
      general_df <-
        general_df %>%
        mutate(idRow = 1:n())

      offering_df <-
        1:nrow(general_df) %>%
        future_map_dfr(function(x) {
          offering <-
            json_data$filings$offerings[[x]]

          has_no_data <-
            length(offering) == 0

          if (has_no_data) {
            return(tibble(idRow = x))
          }
          has_no_rows <-
            offering %>% nrow() == 0
          if (has_no_rows) {
            return(tibble(idRow = x))
          }

          offering_long <-
            offering %>% .resolve_name_df() %>%
            mutate(idRow = x) %>%
            gather(item, value, -idRow) %>%
            group_by(item) %>%
            mutate(countItem = 1:n() - 1) %>%
            ungroup() %>%
            mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
            arrange(countItem) %>%
            select(-countItem)

          col_order <-
            offering_long$item

          offering <-
            offering_long %>%
            spread(item, value) %>%
            select(one_of(c('idRow', col_order)))

          offering <-
            offering %>%
            mutate_at(offering %>% select(dplyr::matches("^count[A-Z]|^amount")) %>% names(),
                      funs(. %>% as.numeric())) %>%
            mutate_at(offering %>% select(dplyr::matches("^date")) %>% names(),
                      funs(. %>% lubridate::ymd()))

          if (nest_data) {
            offering <-
              offering %>%
              nest(-idRow, .key = dataOfferings)
          }
          return(offering)
        }) %>%
        select(idRow, everything())

      offering_df <-
        offering_df %>%
        mutate_at(dplyr::matches("^nameIndustry"),
                  funs(. %>% str_to_upper()))

      general_df <-
        general_df %>%
        mutate(idRow = 1:n()) %>%
        select(-dplyr::matches("nameIndustry")) %>%
        left_join(offering_df) %>%
        select(-idRow) %>%
        suppressWarnings() %>%
        suppressMessages()

      if ('nameIndustry' %in% names(general_df)) {
        general_df <-
          general_df %>%
          dplyr::rename(nameIndustryOffering = nameIndustry)
      }
    }

    has_trades <-
      'trades' %in% names(json_data$filings)

    if (has_trades) {
      general_df <-
        general_df %>%
        mutate(idRow = 1:n())

      trade_df <-
        1:nrow(general_df) %>%
        future_map_dfr(function(x) {
          trades <-
            json_data$filings$trades[[x]]

          has_no_data <-
            length(trades) == 0

          if (has_no_data) {
            return(tibble(idRow = x))
          }
          has_no_rows <-
            trades %>% nrow() == 0
          if (has_no_rows) {
            return(tibble(idRow = x))
          }

          trades <-
            trades %>% .resolve_name_df() %>%
            mutate(idRow = x) %>%
            dplyr::rename(idInsiderTransaction = codeTransaction)

          trades <-
            trades %>%
            mutate_at(.vars = trades %>% select(dplyr::matches("amount|count")) %>% names,
                      funs(. %>% as.numeric())) %>%
            left_join(get_insider_code_df()) %>%
            suppressWarnings() %>%
            suppressMessages()

          if ('amountPrice' %in% names(trades)) {
            if (!'isBought' %in% names(trades)) {
              trades <-
                trades %>%
                mutate(isBought = FALSE)
            }

            trades <-
              trades %>%
              mutate(
                isBought = ifelse(isBought %>% is.na(), FALSE, TRUE),
                countShares = ifelse(isBought == T, countShares, -countShares),
                amountTransaction = countShares * amountPrice
              )
          }

          trades_long <-
            trades %>%
            gather(item, value, -idRow) %>%
            group_by(item) %>%
            mutate(countItem = 1:n() - 1) %>%
            ungroup %>%
            mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
            arrange(countItem) %>%
            select(-countItem)

          col_order <-
            trades_long$item

          trades <-
            trades_long %>%
            spread(item, value) %>%
            select(one_of(c('idRow', col_order)))

          trades <-
            trades %>%
            mutate_at(trades %>% select(dplyr::matches("^count[A-Z]|^amount")) %>% names(),
                      funs(. %>% as.numeric())) %>%
            mutate_at(trades %>% select(dplyr::matches("^date")) %>% names(),
                      funs(. %>% lubridate::ymd()))

          if (nest_data) {
            trades <-
              trades %>%
              nest(-idRow, .key = dataTrades)
          }
          return(trades)
        }) %>%
        select(idRow, everything())

      names(trade_df)[names(trade_df) %>% str_detect('dateFiling')] <-
        trade_df %>% select(dplyr::matches("dateFiling")) %>% names() %>%
        str_replace_all("dateFiling", 'dateFilingInsider')


      general_df <-
        general_df %>%
        mutate(idRow = 1:n()) %>%
        select(-dplyr::matches("nameIndustry")) %>%
        left_join(trade_df %>% select(-dplyr::matches("idTicker")), by = 'idRow') %>%
        select(-idRow) %>%
        suppressWarnings() %>%
        suppressMessages()
    }

    general_df <-
      general_df %>%
      mutate_at(.vars = general_df %>% select(dplyr::matches("nameEntity")) %>% names(),
                funs(. %>% str_to_upper())) %>%
      suppressWarnings() %>%
      ungroup() %>%
      select(-dplyr::matches("^object[A-Z]|^slug|dateiso")) %>%
      select(idCIK,
             nameEntity,
             dplyr::matches("name"),
             dplyr::matches("id"),
             everything())


    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    general_df
  }


.sec_filing_stream <-
  function(filers = 'All',
           filing_name = 'Registrations',
           nest_data = TRUE,
           return_message = TRUE) {
    both_all <-
      filers == 'All' & filing_name == 'All'

    if (both_all) {
      rf_id <-
        .get_most_recent_rf_id()

      start <-
        rf_id - 3500

      rf_ds <-
        seq(start, rf_id, by = 30)

      urls <-
        list('http://rankandfiled.com/data/latest?id=',
             rf_ds) %>%
        purrr::invoke(paste0, .)

      parse_filing_stream_safe <-
        purrr::possibly(.parse_filing_stream, tibble())

      data <-
        urls %>%
        future_map_dfr(function(x) {
          parse_filing_stream_safe(url = x, nest_data = nest_data)
        }) %>%
        distinct() %>%
        select(
          idRF,
          idCIK,
          dplyr::matches("nameEntity"),
          dplyr::matches("idTicker"),
          dplyr::matches("dateFiled"),
          dplyr::matches("datetimeFiled"),
          dplyr::matches("^name"),
          dplyr::matches("^date"),
          dplyr::matches("^id"),
          dplyr::matches("^type"),
          dplyr::matches("^description"),
          everything()
        ) %>%
        mutate(
          urlRankAndFiled = list('http://rankandfiled.com/#/filers/', idCIK, '/filings') %>%
            purrr::invoke(paste0, .)
        )


    } else {
      filer_names <-
        c('All',
          'Corporate Insider',
          'Companies',
          'Investment Company') %>%
        str_to_upper()

      filing_names <-
        c(
          'Annual Reports',
          'Quarterly Reports',
          'Current Reports',
          'Other Reports',
          'Registrations',
          'Private Offerings',
          'Ownership',
          'Prospectuses',
          'Exemptions',
          'Withdrawals',
          'Correspondence',
          'Proxy Statements',
          'Confidential',
          'All'
        ) %>% str_to_upper()

      no_filers <-
        !filers %>% str_to_upper() %in% filer_names

      if (no_filers) {
        stop(
          list(
            "Filers can only be:\n",
            filer_names %>%  stringr::str_to_title() %>% paste0(collapse = '\n')
          ) %>%
            purrr::invoke(paste0, .)
        )
      }
      no_filing_names <-
        !filing_name %>% str_to_upper() %in% filing_names

      if (no_filing_names) {
        stop(
          list(
            "Filing names can only be:\n",
            filing_names %>%  stringr::str_to_title() %>% paste0(collapse = '\n')
          ) %>%
            purrr::invoke(paste0, .)
        )
      }

      .filer_type_df <-
        tibble(
          codeFiler = c('All', 'insider', 'company', 'inv_co'),
          nameFiler = filer_names
        )

      slug_filer <-
        .filer_type_df %>%
        filter(nameFiler == filers %>% str_to_upper()) %>%
        .$codeFiler

      filing_name_df <-
        tibble(
          codeFiling  = c(
            "A",
            "Q",
            "CR",
            "R",
            "REG",
            "REGX",
            "O",
            "P",
            "X",
            "W",
            "SEC",
            "PROXY",
            "CT",
            "ALL"
          ),
          nameFiling = filing_names
        )

      slug_type <-
        filing_name_df %>%
        filter(nameFiling == filing_name %>% str_to_upper()) %>%
        .$codeFiling

      mr_id <-
        .get_most_recent_rf_id()

      url_json <-
        list(
          'http://rankandfiled.com/data/latest?id=',
          mr_id,
          '&group=',
          slug_type,
          '&filer=',
          slug_filer
        ) %>%
        purrr::invoke(paste0, .)

      data <-
        url_json %>%
        .parse_filing_stream() %>%
        select(
          idRF,
          idCIK,
          dplyr::matches("nameEntity"),
          dplyr::matches("idTicker"),
          dplyr::matches("dateFiled"),
          dplyr::matches("datetimeFiled"),
          dplyr::matches("^name"),
          dplyr::matches("^date"),
          dplyr::matches("^id"),
          dplyr::matches("^type"),
          dplyr::matches("^description"),
          everything()
        ) %>%
        mutate(
          urlRankAndFiled = list('http://rankandfiled.com/#/filers/', idCIK, '/filings') %>%
            purrr::invoke(paste0, .)
        )
    }

    if (return_message) {
      list("\nParsed Most Recent filings for ",
           filers,
           ' Filers\n',
           filing_name,
           ' Form Type\n') %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }
    return(data)
  }


#' SEC filing stream
#'
#' This function returns the most recent SEC filings
#' for specified filer type by filing type.
#'
#' @param filers type of filer \itemize{
#' \item \code{All}: all filer types (default)
#' \item \code{Corporate Insider}: corporate insiders
#' \item \code{Companies}:
#' \item \code{Investment Company} acquires investment company data
#' @param filing_names type of filing \itemize{
#' \item \code{Annual Reports}: annual report
#' \item \code{Quarterly Reports}: quarterly report
#' \item \code{Current Reports}: current report
#' \item \code{Other Reports}: other reports
#' \item \code{Registrations}: securities registration
#' \item \code{Private Offerings}: private securities offerings
#' \item \code{Ownership}: ownership
#' \item \code{Prospectuses}: securities prospectus
#' \item \code{Exemptions}: exempt securities
#' \item \code{Withdrawals}: securities withdrawls
#' \item \code{Correspondence}: SEC correspondence
#' \item \code{Proxy Statements}:  proxy issuances
#' \item \code{Confidential}: confidential information
#' }
#' @param return_message return a message
#' @import dplyr tidyr purrr stringr formattable readr lubridate
#' @importFrom jsonlite fromJSON
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#' sec_filing_streams(filers = 'All', filing_names = 'Annual Reports')
#' }
#'
sec_filing_streams_rf <-
  function(filers = c('All', 'Corporate Insider', 'Companies', 'Investment Company'),
           filing_names = c(
             'All',
             'Annual Reports',
             'Quarterly Reports',
             'Current Reports',
             'Other Reports',
             'Registrations',
             'Private Offerings',
             'Ownership',
             'Prospectuses',
             'Exemptions',
             'Withdrawals',
             'Correspondence',
             'Proxy Statements',
             'Confidential'
           ),
           nest_data = TRUE,
           return_message = TRUE) {
    type_df <-
      expand.grid(
        nameFiler = filers,
        nameFiling = filing_names,
        stringsAsFactors = FALSE
      ) %>%
      as_tibble()

    sec_filing_stream_safe <-
      purrr::possibly(.sec_filing_stream, NULL)

    all_data <-
      1:nrow(type_df) %>%
      future_map_dfr(function(x) {
        sec_filing_stream_safe(
          filers = type_df$nameFiler[[x]],
          filing_name = type_df$nameFiling[[x]],
          nest_data = nest_data,
          return_message = return_message
        )
      }) %>%
      distinct() %>%
      mutate(idRow = 1:n()) %>%
      group_by(idRF) %>%
      filter(idRow == min(idRow)) %>%
      ungroup() %>%
      select(-idRow)

    all_data <-
      all_data %>%
      select(-dplyr::matches("dateiso")) %>%
      mutate_at(all_data %>% select(dplyr::matches("^name|^description|^industry|^typeEntity")) %>% names(),
                funs(. %>% stringr::str_to_upper()))

    return(all_data)
  }


# publics -----------------------------------------------------------------
.generate_ticker_general_url <-
  function(ticker = "FB") {
    glue("http://rankandfiled.com/data/company/{ticker}/general") %>% as.character()
  }


.parse_json_public_general <-
  function(url = "http://rankandfiled.com/data/company/BX/general",
           nest_data = TRUE,
           return_message = TRUE) {
    if (!url %>% httr::url_ok()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    ticker <-
      url %>% str_replace("http://rankandfiled.com/data/company/", '') %>%
      str_split('\\/') %>% flatten_chr() %>%
      .[[1]]

    general_class_df <-
      json_data %>% future_map_dfr(class) %>%
      gather(column, type) %>%
      mutate(idName = 1:n())

    general_df <-
      json_data[general_class_df %>%
                  filter(!type %in% c('list', 'data.frame')) %>%
                  .$idName] %>%
      flatten_df() %>%
      .resolve_name_df() %>%
      select(-dplyr::matches("idTicker")) %>%
      mutate(idTicker = ticker)

    has_market <-
      'market' %in% names(json_data)

    if (has_market) {
      general_df <-
        general_df %>%
        bind_cols(json_data$market %>%
                    flatten_df() %>%
                    .resolve_name_df() %>%
                    select(-dplyr::matches("codeExchange")))

      if ('amountEquityMarketCap' %in% names(general_df)) {
        general_df <-
          general_df %>%
          mutate(amountEquityMarketCap = amountEquityMarketCap %>% formattable::currency(digits = 0))
      }

      if ('nameIndustry' %in% names(general_df)) {
        has_semi <-
          general_df$nameIndustry %>% str_detect('\\:')
        if (has_semi) {
          general_df <-
            general_df %>%
            tidyr::separate(
              nameIndustry,
              into = c('nameIndustry', 'nameSubIndustry'),
              sep = '\\: '
            ) %>%
            suppressWarnings()
        }
      }

      general_df <-
        general_df %>%
        mutate_if(is_character,
                  str_to_upper)
    }

    has_snap_shot <-
      'snapshot' %in% names(json_data)

    if (has_snap_shot) {
      snap_shot_df <-
        json_data$snapshot %>%
        as_tibble()

      if ('ebitda' %in% names(snap_shot_df)) {
        snap_shot_df <-
          snap_shot_df %>%
          mutate(digitEBITDA = ebitda %>% substr(
            start = (ebitda %>% nchar()) ,
            stop = ebitda %>% nchar()
          ))
      }

      snap_shot_df <-
        snap_shot_df %>%
        .resolve_name_df() %>%
        select(-dplyr::matches("amountEquityMarketCap"))

      if ('digitEBITDA' %in% names(snap_shot_df)) {
        snap_shot_df <-
          snap_shot_df %>%
          mutate(
            amountEBITDA = ifelse(
              digitEBITDA == "B",
              amountEBITDA * 1000000000,
              amountEBITDA * 1000000
            )
          ) %>%
          select(-digitEBITDA)
      }

      snap_shot_df <-
        snap_shot_df %>%
        mutate_at(.vars = snap_shot_df %>% select(dplyr::matches("price|amount")) %>% names,
                  funs(. %>% currency(digits = 2))) %>%
        mutate_at(.vars = snap_shot_df %>% select(dplyr::matches("amountEBITDA")) %>% names,
                  funs(. %>% currency(digits = 0)))

      general_df <-
        general_df %>%
        bind_cols(snap_shot_df)
    }

    has_filer <-
      ((
        'filer' %in% names(json_data) &
          json_data$filer %>% as_tibble() %>% ncol > 2
      ))

    if (has_filer) {
      filer_df <-
        json_data$filer %>%
        as_tibble()

      filer_df <-
        filer_df %>%
        .resolve_name_df()

      if ('detailsOwnedBy' %in% names(filer_df)) {
        filer_df <-
          filer_df %>%
          dplyr::rename(detailsOwns = detailsOwnedBy)

        filer_df <-
          filer_df %>%
          mutate(idRow = 1:n(),
                 detailsOwns = detailsOwns %>% str_replace("\\|", ''))

        owns_df <-
          1:nrow(filer_df) %>%
          future_map_dfr(function(x) {
            owns <-
              filer_df$detailsOwns[[x]] %>%
              str_split("\\|") %>%
              flatten_chr()

            df <-
              tibble(idRow = x, owns) %>%
              tidyr::separate(owns,
                              into = c('idTickerOwns', 'owns'),
                              sep = '\\:') %>%
              tidyr::separate(owns,
                              into = c('nameCompanyOwns', 'owns'),
                              sep = '\\_') %>%
              tidyr::separate(
                owns,
                into = c('typeOwnerOwns', 'dateOwnershipOwns'),
                sep = '\\#'
              ) %>%
              mutate(countItem = 1:n() - 1) %>%
              mutate(
                nameCompanyOwns = nameCompanyOwns %>% str_to_upper(),
                idTickerOwns = idTickerOwns %>% str_to_upper()
              ) %>%
              gather(item, value, -c(idRow, countItem)) %>%
              mutate(value = ifelse(value == '', NA, value)) %>%
              mutate(item = ifelse(countItem == 0, item, item %>% paste0(countItem))) %>%
              arrange(countItem) %>%
              select(-countItem)

            col_order <-
              c('idRow', df$item)
            df <-
              df %>%
              spread(item, value) %>%
              select(one_of(col_order))

            df <-
              df %>%
              mutate_at(df %>% select(dplyr::matches("date")) %>% names(),
                        funs(. %>% lubridate::ymd()))

            if (nest_data) {
              df <-
                df %>%
                nest(-idRow, .key = dataCompaniesOwns)
            }
            return(df)
          }) %>%
          suppressWarnings()

        filer_df <-
          filer_df %>%
          left_join(owns_df) %>%
          select(-idRow) %>%
          suppressMessages()
      }

      filer_df <-
        filer_df %>%
        mutate_at(filer_df %>% select(dplyr::matches("nameEntity")) %>% names(),
                  funs(. %>% stringr::str_to_upper())) %>%
        select(
          dplyr::matches("nameEntity"),
          dplyr::matches("^id"),
          dplyr::matches("industry"),
          dplyr::matches("name"),
          dplyr::matches("type"),
          everything()
        ) %>%
        select(-dplyr::matches("object"))

      if ('addressStreet1Entity' %in% names(filer_df)) {
        filer_df <-
          filer_df %>%
          mutate(
            addressEntity = list(
              addressStreet1Entity,
              ' ',
              cityEntity,
              ' ',
              stateEntity,
              ', ',
              zipcodeEntity
            ) %>% purrr::invoke(paste0, .)
          ) %>%
          select(nameEntity, addressEntity, everything())
      }

      filer_cols <-
        names(filer_df)[!names(filer_df) %in% names(general_df)]

      general_df <-
        general_df %>%
        bind_cols(filer_df %>% select(one_of(filer_cols))) %>%
        select(idCIK, dplyr::matches("nameEntity"), everything()) %>%
        select(-dplyr::matches("detailsOwns"))
    }

    if ('detailsOwns' %in% names(general_df)) {
      detail_df <-
        seq_along(general_df$detailsOwns) %>%
        future_map_dfr(function(x) {
          detail_value <-
            general_df$detailsOwns[[x]]

          if (detail_value %>% is.na()) {
            df <-
              tibble(idRow = x, nameCompanyOwns = NA)
            if (nest_data) {
              df <-
                df %>%
                nest(-idRow, .key = dataCompaniesOwns)
            }
            return(df)
          }

          values <-
            detail_value %>% str_replace('\\|', '') %>%
            str_split('\\|') %>%
            flatten_chr()

          df_data <-
            tibble(value = values) %>%
            tidyr::separate(value,
                            into = c('idTickerOwns', 'other'),
                            sep = '\\:') %>%
            tidyr::separate(other,
                            into = c('nameCompanyOwns', 'other'),
                            sep = '\\_') %>%
            tidyr::separate(other,
                            into = c('roleOwner', 'dateOwner'),
                            sep = '\\#') %>%
            mutate(nameCompanyOwns = nameCompanyOwns %>% str_to_upper(),
                   idRow = x) %>%
            gather(item, value, -idRow, na.rm = TRUE) %>%
            group_by(item) %>%
            mutate(value = ifelse(value == '', NA, value),
                   count = 1:n() - 1) %>%
            ungroup() %>%
            arrange((count)) %>%
            mutate(item = ifelse(count == 0, item, paste0(item, count))) %>%
            select(-count)

          column_order <-
            c('idRow', df_data$item)

          df_data <-
            df_data %>%
            spread(item, value) %>%
            select(one_of(column_order))

          if (nest_data) {
            df_data <-
              df_data %>%
              nest(-idRow, .key = dataCompaniesOwns)
          }
          return(df_data)
        }) %>%
        suppressWarnings()

      detail_df <-
        detail_df %>%
        mutate_at(.vars = detail_df %>% select(dplyr::matches("date")) %>% names(),
                  funs(. %>% ymd())) %>%
        suppressWarnings()

      general_df <-
        general_df %>%
        mutate(idRow = 1:n()) %>%
        select(-detailsOwns) %>%
        left_join(detail_df) %>%
        select(-idRow) %>%
        suppressMessages()
    }

    general_df <-
      general_df %>%
      mutate(
        urlTickerRankandFiled = list('http://rankandfiled.com/#/public/', idTicker, '/filings') %>% purrr::invoke(paste0, .),
        urlJSON = url
      )

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(general_df)
  }

.parse_company_general <-
  function(ticker = "FB",
           nest_data = TRUE,
           return_message = TRUE) {
    options(warn = -1)
    data <-
      .generate_ticker_general_url(ticker = ticker) %>%
      .parse_json_public_general(nest_data = nest_data,
                                 return_message = return_message)
    if ('nameEntity' %in% names(data)) {
      data <-
        data %>%
        mutate(nameCompany = nameEntity) %>%
        select(idCIK, idTicker, nameEntity, nameCompany, everything()) %>%
        select(-dplyr::matches("dateiso"))
    } else {
      df_name <-
        list('http://rankandfiled.com/data/filer/',
             data$idCIK,
             '/general') %>%
        purrr::invoke(paste0, .) %>%
        .parse_json_general_filing()

      entity <-
        df_name$nameEntity

      data <-
        data %>%
        mutate(nameEntity = entity,
               nameCompany = nameEntity) %>%
        select(idCIK, idTicker, nameEntity, nameCompany, everything()) %>%
        select(-dplyr::matches("dateiso"))
    }

    data <-
      data %>%
      resolve_names_to_upper()
    return(data)
  }
.parse_company_general_safe <-
  purrr::possibly(.parse_company_general, tibble())
.parse_json_trades <-
  function(url = "http://rankandfiled.com/data/filer/1326801/trades?start=0",
           return_message = TRUE) {
    if (!url %>% httr::url_ok() %>% suppressWarnings()) {
      return(tibble())
    }

    json_data <-
      url %>%
      jsonlite::fromJSON()

    options(scipen = 9999)

    cik <-
      url %>% str_replace_all('http://rankandfiled.com/data/filer/|/trades', '') %>%
      str_split('\\?') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()

    trade_df <-
      json_data$trades %>%
      as_tibble() %>%
      dplyr::rename(dateTrade = date) %>%
      mutate(dateTrade = dateTrade %>% lubridate::ymd())

    trade_df <-
      trade_df %>%
      separate(
        trade,
        into = c(
          "idCIKOwner",
          "idCIK",
          "idInsiderType",
          "countSharesOwned",
          "descriptionOption",
          "idTypeInsiderTransaction",
          "amountPrice",
          "countShares",
          "idInsiderTransaction",
          "X10",
          "detailOwnershipIndirect",
          "priceExcercised",
          "dateOptionExcercisable",
          "dateOptionExpiry",
          "countSharesOptions",
          "typeSecurityOption",
          "X17"
        ),
        sep = '\\*'
      ) %>%
      suppressWarnings() %>%
      select(-dplyr::matches("X"))

    trade_df <-
      trade_df %>%
      mutate_at(.vars =
                  trade_df %>% select(dplyr::matches("date")) %>% names(),
                .funs = lubridate::ymd) %>%
      mutate_at(.vars =
                  trade_df %>% select(dplyr::matches("idCIK|count|amount|price")) %>% names(),
                funs(. %>% as.character() %>% readr::parse_number())) %>%
      left_join(tibble(
        idInsiderType = c("D", "ND"),
        typeInsider = c("Director", "Non-Director")
      )) %>%
      left_join(get_insider_code_df()) %>%
      left_join(
        tibble(
          idTypeInsiderTransaction = c("A", "D", "None"),
          typeInsiderTransaction = c('Purchase', 'Sale', 'None'),
          isBought = c(TRUE, FALSE, NA)
        )
      ) %>%
      suppressMessages()

    trade_df <-
      trade_df %>%
      mutate(
        countShares = ifelse(isBought == T, countShares, -countShares),
        amountTransaction = countShares * amountPrice,
        urlJSON = url
      )

    has_indirect_owner <-
      trade_df$detailOwnershipIndirect %>% str_count("By") %>% sum() > 0

    if (has_indirect_owner) {
      trade_df <-
        trade_df %>%
        tidyr::separate(
          detailOwnershipIndirect,
          into = c('remove', "nameOwnerIndirect"),
          remove = FALSE,
          sep = 'By '
        ) %>%
        mutate(nameOwnerIndirect = nameOwnerIndirect %>% str_trim() %>% str_to_upper()) %>%
        select(-remove) %>%
        suppressWarnings()
    }

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    return(trade_df)
  }

.parse_trades <-
  function(ticker = "FB",
           nest_data = TRUE,
           return_message = TRUE) {
    general_df <-
      .parse_company_general_safe(ticker = ticker, nest_data)

    cik <-
      general_df$idCIK

    trader_url <-
      list("http://rankandfiled.com/data/filer/", cik, '/traders') %>%
      purrr::invoke(paste0, .)

    count_trades <-
      .parse_json_traders(url = trader_url) %>%
      .$countTraders %>% unique() %/% 50

    trade_urls <-
      list(
        'http://rankandfiled.com/data/filer/',
        cik,
        '/trades?start=',
        seq(0, by = 50, length.out = count_trades)
      ) %>%
      purrr::invoke(paste0, .)

    .parse_json_trades_safe <-
      purrr::possibly(.parse_json_trades, NULL)

    all_trades <-
      trade_urls %>%
      future_map_dfr(function(x) {
        .parse_json_trades_safe(url = x, return_message = return_message)
      }) %>%
      distinct() %>%
      suppressWarnings()

    owners_df <-
      list("http://rankandfiled.com/data/filer/", cik, '/owners') %>%
      purrr::invoke(paste0, .) %>%
      .parse_json_owners(nest_data = nest_data)

    entity <-
      general_df$nameEntity

    all_trades <-
      all_trades %>%
      left_join(owners_df %>%
                  select(idCIKOwner = idCIKOwned, nameEntityOwner) %>%
                  distinct()) %>%
      suppressMessages()

    entity_name <-
      general_df$nameEntity

    all_trades <-
      all_trades %>%
      mutate(nameEntity = entity_name,
             idTicker = ticker) %>%
      select(idCIK,
             nameEntity,
             idTicker,
             dateTrade,
             idCIKOwner,
             nameEntityOwner,
             everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    all_trades <-
      all_trades %>%
      mutate_at(.vars = all_trades %>% select(dplyr::matches("count")) %>% names,
                funs(. %>% formattable::comma(digits = 0))) %>%
      mutate_at(.vars = all_trades %>% select(dplyr::matches("amount|price")) %>% names,
                funs(. %>% formattable::currency(digits = 2))) %>%
      select(idCIK:countShares, amountTransaction, everything()) %>%
      resolve_names_to_upper()

    if (return_message) {
      list("Parsed ", all_trades %>% nrow(), ' trades for ', entity) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(all_trades)

  }

.parse_public_filings <-
  function(ticker = "FB",
           return_message = TRUE) {
    general_df <-
      .parse_company_general_safe(ticker = ticker)

    cik <-
      general_df$idCIK

    filing_pages <-
      general_df$countFilings %/% 50

    filing_urls <-
      list(
        'http://rankandfiled.com/data/filer/',
        cik,
        '/all?start=',
        seq(0, by = 50, length.out = filing_pages)
      ) %>%
      purrr::invoke(paste0, .)

    .parse_json_public_filers_safe <-
      purrr::possibly(.parse_json_public_filers, NULL)

    .all_filings <-
      filing_urls %>%
      future_map_dfr(function(x) {
        .parse_json_public_filers_safe(url = x, return_message = return_message)
      }) %>%
      distinct() %>%
      suppressWarnings()

    entity <-
      general_df$nameEntity

    .all_filings <-
      .all_filings %>%
      mutate(idTicker = ticker,
             nameCompany = entity,
             nameEntity = entity) %>%
      select(idCIK,
             idTicker,
             nameEntity,
             nameCompany,
             dateFiling,
             idRF,
             everything())

    if (return_message) {
      list("Parsed ", .all_filings %>% nrow(), ' SEC Filings for ', entity) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }
    return(.all_filings)
  }

.parse_ticker_data <-
  function(ticker = "VNO",
           nest_data = TRUE,
           tables = NULL,
           return_message = TRUE) {
    if (length(tables) == 0) {
      tables <-
        c(
          'General',
          'CIK Filings',
          'Filings',
          'Private Offerings',
          'Related Parties',
          'Traders',
          'C Level',
          'MDA',
          'Owners',
          'Insider Trades',
          'Trades',
          'Subsidiaries'
        )
    }

    ticker <-
      ticker %>% str_to_upper()

    .parse_company_general_safe <-
      purrr::possibly(.parse_company_general, NULL)

    .parse_trades_safe <-
      purrr::possibly(.parse_trades, NULL)

    .parse_public_filings_safe <-
      purrr::possibly(.parse_public_filings, NULL)

    general <-
      .parse_company_general_safe(ticker = ticker,
                                  nest_data = nest_data,
                                  return_message = return_message) %>%
      suppressWarnings()

    has_trades <-
      "TRADES" %>% str_detect(tables %>% str_to_upper()) %>% sum() > 0

    if (has_trades) {
      trades <-
        .parse_trades_safe(ticker = ticker,
                           nest_data = nest_data,
                           return_message = return_message) %>%
        suppressWarnings()
    } else {
      trades <-
        tibble(idTicker = ticker)
    }

    cik_data <-
      general$idCIK %>%
      .parse_cik_data(tables = tables,
                      nest_data = nest_data,
                      return_message = return_message)

    if ('General' %in% cik_data$nameTable) {
      cik_data <-
        cik_data %>%
        filter(!nameTable == 'General')
    }
    all_data <-
      tibble(
        nameEntity = general$nameEntity,
        idCIK = general$idCIK,
        nameTable = c('Company Profile', 'Insider Trades'),
        dataTable = list(general, trades)
      ) %>%
      bind_rows(cik_data) %>%
      mutate(countCols = dataTable %>% map_dbl(ncol)) %>%
      filter(countCols > 1) %>%
      select(-countCols)


    if (return_message) {
      list("Acquired all data for ", all_data$nameEntity %>% unique()) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(all_data)


  }

#' US Public Company snapshot
#'
#' This function returns snapshot details
#' of X public companies.  Information includes
#' corporate metadata, valuation metrics, and more.
#'
#' @param merge_type how to merge general information for public companies \itemize{
#' \item \code{NULL} and \code{MATCH}: only acquires metadata for unmatched batch import companies (default)
#' #' \item \code{ALL}: returns general information for all companies
#' }
#' @param return_message \code{TRUE} return a message after data import
#'
#' @return a \code{tibble}
#' @export
#' @import dplyr tidyr purrr stringr formattable readr lubridate
#' @importFrom jsonlite fromJSON
#' @family SEC
#' @family real-time data
#' @family Rank and Filed
#' @family entity search
#' @examples
#' \dontrun{
#' us_public_companies(merge_type = NULL)
#'
#' }
us_public_companies <-
  function(merge_type = NULL,
           return_message = TRUE) {
    no_merge <-
      (!'merge_type' %>% exists()) |
      (merge_type %>% purrr::is_null())

    if (no_merge) {
      merge_type <-
        'MATCH'
    }

    json_data <-
      "http://rankandfiled.com/data/public_companies" %>%
      jsonlite::fromJSON()

    company_data <-
      tibble(df = json_data$result$data %>%
               str_split(pattern = '\\|') %>%
               flatten_chr()) %>%
      tidyr::separate(
        df,
        sep = '\\*',
        into = c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9")
      ) %>%
      mutate_at(
        .vars = c("X5", "X6", "X7", "X8", "X9"),
        funs(. %>% as.character() %>% readr::parse_number())
      ) %>%
      purrr::set_names(
        c(
          'idTicker',
          'idExchange',
          'codeLocationBusiness',
          'codeLocationIncorporation',
          'idSector',
          'amountEquityMarketCap',
          'priceOpen',
          'price52WeekLow',
          'price52WeekHigh'
        )
      ) %>%
      left_join(tibble(
        idSector = 1:12,
        nameSector = c(
          'Finance',
          'Capital Goods',
          'Technology',
          'Transportation',
          'Consumer Services',
          'Health Care',
          'Consumer Durables',
          'Public Utilities',
          'Miscellaneous',
          'Basic Industries',
          'Energy',
          'Consumer Non Durables'
        )
      )) %>%
      suppressMessages() %>%
      left_join(tibble(
        idExchange = c('N', 'Q', 'A'),
        nameExchange = c('NYSE', 'NASDAQ', 'NYSE ARCA')
      )) %>%
      mutate(
        amountEquityMarketCap = ifelse(
          idTicker == 'BRK-A',
          amountEquityMarketCap * 100000000000,
          amountEquityMarketCap
        ),
        codeLocationBusiness = ifelse(
          codeLocationBusiness == '',
          codeLocationIncorporation,
          codeLocationBusiness
        ),
        codeLocationIncorporation = ifelse(codeLocationIncorporation == '',
                                           NA,
                                           codeLocationIncorporation),
        countSharesOutstanding = ifelse(priceOpen > 0,
                                        ((
                                          amountEquityMarketCap / priceOpen
                                        )),
                                        NA),
        pct52WeekHigh = ifelse(priceOpen > 0,
                               ((
                                 priceOpen / price52WeekHigh
                               )),
                               NA),
        pct52WeekLow = ifelse(priceOpen > 0,
                              ((
                                priceOpen / price52WeekLow
                              )),
                              NA),
        amountEquityMarketCap = (amountEquityMarketCap),
        urlTickerRankandFiled = list('http://rankandfiled.com/#/public/', idTicker, '/filings') %>% purrr::invoke(paste0, .)
      ) %>%
      select(idTicker:idSector, nameSector, everything()) %>%
      suppressMessages()

    countries <-
      location_codes()

    company_data <-
      company_data %>%
      left_join(
        countries %>%
          dplyr::rename(
            codeLocationBusiness = codeLocation,
            nameLocationBusiness = nameLocation
          )
      ) %>%
      left_join(
        countries %>%
          dplyr::rename(
            codeLocationIncorporation = codeLocation,
            nameLocationIncorporation = nameLocation
          )
      ) %>%
      suppressMessages()

    company_data <-
      company_data %>%
      filter(priceOpen > 0) %>%
      filter(!priceOpen %>% is.na()) %>%
      group_by(idTicker, nameSector) %>%
      filter(amountEquityMarketCap == max(amountEquityMarketCap, na.rm = TRUE)) %>%
      ungroup() %>%
      arrange(idTicker)

    ticker_count_df <-
      company_data %>%
      count(idTicker, sort = TRUE)

    fine_tickers <-
      ticker_count_df %>% filter(n < 2) %>% .$idTicker

    fine_df <-
      company_data %>%
      filter(idTicker %in% (fine_tickers))

    dup_count_df <-
      ticker_count_df %>% filter(n > 1)

    dup_df <-
      company_data %>%
      filter(idTicker %in% dup_count_df$idTicker) %>%
      arrange(idTicker)

    dup_general_df <-
      dup_count_df$idTicker %>%
      future_map_dfr(function(x) {
        .parse_company_general_safe(ticker = x)
      }) %>%
      arrange(idTicker)

    dup_df <-
      dup_general_df %>%
      select(idTicker, nameLocationBusiness = stateEntity, nameSector) %>%
      left_join(countries %>% dplyr::rename(nameLocationBusiness = nameLocation)) %>%
      dplyr::rename(codeLocationBusiness = codeLocation) %>%
      left_join(dup_df) %>%
      suppressMessages()

    company_data <-
      fine_df %>%
      bind_rows(dup_df) %>%
      arrange(idTicker)

    is_merge_all <-
      merge_type %>% str_to_upper() == 'ALL'

    is_match <-
      merge_type %>% str_to_upper() == 'MATCH'

    if (is_merge_all) {
      general_data <-
        company_data$idTicker %>%
        unique() %>%
        future_map_dfr(function(x) {
          .parse_company_general_safe(ticker = x, return_message = return_message)
        }) %>%
        suppressWarnings()

      company_data <-
        company_data %>%
        inner_join(general_data %>%
                     select(-one_of(
                       c(
                         "idExchange",
                         "nameSector",
                         "amountEquityMarketCap",
                         "priceOpen",
                         "price52WeekLow",
                         "price52WeekHigh",
                         "urlTickerRankandFiled"
                       )
                     ))) %>%
        dplyr::rename(nameCompany = nameEntity) %>%
        select(idTicker,
               nameCompany,
               idCIK,
               idSector,
               nameSector,
               nameExchange,
               everything())

      if (return_message) {
        list(
          "Acquired data for ",
          company_data %>% nrow() %>% formattable::comma(digits = 0),
          ' US stocks with a combined market capitalization of ',
          company_data$amountEquityMarketCap %>% sum(na.rm = TRUE) %>% formattable::currency(digits = 0)
        ) %>%
          purrr::invoke(paste0, .) %>%
          cat(fill = T)
      }

      return(company_data)
    }

    if (is_match) {
      all_tickers <-
        rf_us_tickers()

      company_data <-
        company_data %>%
        left_join(
          all_tickers %>%
            filter(!urlTickerRankandFiled %>% is.na()) %>%
            select(
              idTicker,
              idCIK,
              nameCompany,
              codeLocationBusiness,
              idSIC,
              classificationSIC
            )
        ) %>%
        suppressMessages()

      count_df <-
        company_data %>%
        count(idTicker, sort = TRUE)

      dup_tickers <-
        count_df %>%
        filter(n > 1) %>%
        .$idTicker %>%
        unique()

      fine_df <-
        company_data %>%
        filter(!idTicker %in% dup_tickers)

      dup_df <-
        company_data %>%
        filter(idTicker %in% dup_tickers)
      .parse_company_general_safe <-
        purrr::possibly(.parse_company_general, tibble)
      dup_general_df <-
        dup_tickers %>%
        future_map_dfr(function(x) {
          .parse_company_general_safe(ticker = x)
        }) %>%
        arrange(idTicker) %>%
        suppressWarnings()

      dup_df <-
        dup_df %>%
        select(-c(nameCompany, idCIK, idSIC, classificationSIC)) %>%
        distinct() %>%
        left_join(dup_general_df %>%
                    select(idTicker, idCIK, nameCompany = nameEntity)) %>%
        left_join(all_tickers %>%
                    select(idCIK, idSIC, classificationSIC)) %>%
        suppressWarnings() %>%
        suppressMessages()

      company_data <-
        fine_df %>%
        bind_rows(dup_df) %>%
        distinct()

      match_df <-
        company_data %>%
        filter(!nameCompany %>% is.na())

      missing_name_df <-
        company_data %>%
        filter(nameCompany %>% is.na()) %>%
        select(-c(nameCompany, idCIK)) %>%
        inner_join(all_tickers %>% select(idTicker, nameCompany, idCIK)) %>%
        suppressMessages()

      count_df <-
        missing_name_df %>%
        count(idTicker, sort = TRUE)

      dup_tickers <-
        count_df %>%
        filter(n > 1) %>%
        .$idTicker %>%
        unique()

      fine_df <-
        missing_name_df %>%
        filter(!idTicker %in% dup_tickers)

      dup_df <-
        missing_name_df %>%
        filter(idTicker %in% dup_tickers)

      dup_general_df <-
        dup_tickers %>%
        future_map_dfr(function(x) {
          .parse_company_general_safe(ticker = x)
        }) %>%
        arrange(idTicker) %>%
        suppressWarnings()

      dup_df <-
        dup_df %>%
        select(-c(nameCompany, idCIK)) %>%
        distinct() %>%
        left_join(dup_general_df %>% select(idTicker, nameCompany = nameEntity, idCIK)) %>%
        suppressMessages()

      missing_name_df <-
        fine_df %>%
        bind_rows(dup_df) %>%
        select(-c(idSIC, classificationSIC)) %>%
        left_join(all_tickers %>%
                    select(idCIK, idSIC, classificationSIC)) %>%
        suppressMessages()

      company_data <-
        match_df %>%
        bind_rows(missing_name_df) %>%
        arrange(desc(amountEquityMarketCap)) %>%
        select(
          idCIK,
          idTicker,
          nameCompany,
          idExchange,
          idSector,
          nameSector,
          idSIC,
          classificationSIC,
          everything()
        )

      company_data <-
        company_data %>%
        mutate_at(
          company_data %>% select(dplyr::matches("price")) %>% names(),
          funs(. %>% formattable::currency(digits = 2))
        ) %>%
        mutate_at(
          company_data %>% select(dplyr::matches("amount")) %>% names(),
          funs(. %>% formattable::currency(digits = 0))
        ) %>%
        mutate_at(
          company_data %>% select(dplyr::matches("pct")) %>% names(),
          funs(. %>% formattable::percent(digits = 2))
        )
      if (return_message) {
        list(
          "Acquired data for ",
          company_data %>% nrow() %>% formattable::comma(digits = 0),
          ' US Stocks with a combined market capitalization of ',
          company_data$amountEquityMarketCap %>% sum(na.rm = TRUE) %>% formattable::currency(digits = 0)
        ) %>%
          purrr::invoke(paste0, .) %>%
          cat(fill = T)
      }

      company_data <-
        company_data %>%
        resolve_names_to_upper()

      return(company_data)

    }
  }

# SEC - Subsidiary --------------------------------------------------------

.parse_sec_url_for_cik <-
  function(url) {
    url %>%
      str_replace_all("https://www.sec.gov/Archives/edgar/data/", '') %>%
      str_split('\\/') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()
  }

.get_loc_df <-
  function() {
    tibble(
      nameLocation = c(
        "AFGHANISTAN",
        "ALAND ISLANDS",
        "ALBANIA",
        "ALGERIA",
        "AMERICAN SAMOA",
        "ANDORRA",
        "ANGOLA",
        "ANGUILLA",
        "ANTARCTICA",
        "ANTIGUA AND BARBUDA",
        "ARGENTINA",
        "ARMENIA",
        "ARUBA",
        "AUSTRALIA",
        "AUSTRIA",
        "AUSTRIA-HUNGARY",
        "AZERBAIJAN",
        "BADEN",
        "BAHAMAS",
        "BAHRAIN",
        "BANGLADESH",
        "BARBADOS",
        "BAVARIA",
        "BELARUS",
        "BELGIUM",
        "BELIZE",
        "BENIN",
        "BERMUDA",
        "BHUTAN",
        "BOLIVIA, PLURINATIONAL STATE OF",
        "BONAIRE, SINT EUSTATIUS AND SABA",
        "BOSNIA AND HERZEGOVINA",
        "BOTSWANA",
        "BOUVET ISLAND",
        "BRAZIL",
        "BRITISH INDIAN OCEAN TERRITORY",
        "BRUNEI DARUSSALAM",
        "BULGARIA",
        "BURKINA FASO",
        "BURUNDI",
        "CAMBODIA",
        "CAMEROON",
        "CANADA",
        "CABO VERDE",
        "CAYMAN ISLANDS",
        "CENTRAL AFRICAN REPUBLIC",
        "CHAD",
        "CHILE",
        "CHINA",
        "CHRISTMAS ISLAND",
        "COCOS (KEELING) ISLANDS",
        "COLOMBIA",
        "COMOROS",
        "CONGO, THE DEMOCRATIC REPUBLIC OF THE",
        "CONGO",
        "COOK ISLANDS",
        "COSTA RICA",
        "COTE D'IVOIRE",
        "CROATIA",
        "CUBA",
        "CURACAO",
        "CYPRUS",
        "CZECH REPUBLIC",
        "CZECHOSLOVAKIA",
        "DENMARK",
        "DJIBOUTI",
        "DOMINICA",
        "DOMINICAN REPUBLIC",
        "ECUADOR",
        "EGYPT",
        "EL SALVADOR",
        "EQUATORIAL GUINEA",
        "ERITREA",
        "ESTONIA",
        "ETHIOPIA",
        "FALKLAND ISLANDS (MALVINAS)",
        "FAROE ISLANDS",
        "FIJI",
        "FINLAND",
        "FRANCE",
        "FRENCH GUIANA",
        "FRENCH POLYNESIA",
        "FRENCH SOUTHERN TERRITORIES",
        "GABON",
        "GAMBIA",
        "GEORGIA",
        "GERMAN DEMOCRATIC REPUBLIC",
        "FEDERAL REPUBLIC OF GERMANY",
        "GERMANY",
        "GHANA",
        "GIBRALTAR",
        "GREECE",
        "GREENLAND",
        "GRENADA",
        "GUADELOUPE",
        "GUAM",
        "GUATEMALA",
        "GUERNSEY",
        "GUINEA",
        "GUINEA-BISSAU",
        "GUYANA",
        "HAITI",
        "HANOVER",
        "HEARD ISLAND AND MCDONALD ISLANDS",
        "HESSE ELECTORAL",
        "HESSE GRAND DUCAL",
        "HOLY SEE (VATICAN CITY STATE)",
        "HONDURAS",
        "HONG KONG",
        "HUNGARY",
        "ICELAND",
        "INDIA",
        "INDONESIA",
        "IRAN, ISLAMIC REPUBLIC OF",
        "IRAQ",
        "IRELAND",
        "ISLE OF MAN",
        "ISRAEL",
        "ITALY",
        "JAMAICA",
        "JAPAN",
        "JERSEY",
        "JORDAN",
        "KAZAKHSTAN",
        "KENYA",
        "KIRIBATI",
        "KOREA",
        "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",
        "KOREA, REPUBLIC OF",
        "KOSOVO",
        "KUWAIT",
        "KYRGYZSTAN",
        "LAO PEOPLE'S DEMOCRATIC REPUBLIC",
        "LATVIA",
        "LEBANON",
        "LESOTHO",
        "LIBERIA",
        "LIBYA",
        "LIECHTENSTEIN",
        "LITHUANIA",
        "LUXEMBOURG",
        "MACAO",
        "MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF",
        "MADAGASCAR",
        "MALAWI",
        "MALAYSIA",
        "MALDIVES",
        "MALI",
        "MALTA",
        "MARSHALL ISLANDS",
        "MARTINIQUE",
        "MAURITANIA",
        "MAURITIUS",
        "MAYOTTE",
        "MECKLENBURG SCHWERIN",
        "MEXICO",
        "MICRONESIA, FEDERATED STATES OF",
        "MODENA",
        "MOLDOVA, REPUBLIC OF",
        "MONACO",
        "MONGOLIA",
        "MONTENEGRO",
        "MONTSERRAT",
        "MOROCCO",
        "MOZAMBIQUE",
        "MYANMAR",
        "NAMIBIA",
        "NAURU",
        "NEPAL",
        "NETHERLANDS",
        "NETHERLANDS ANTILLES",
        "NEW CALEDONIA",
        "NEW ZEALAND",
        "NICARAGUA",
        "NIGER",
        "NIGERIA",
        "NIUE",
        "NORFOLK ISLAND",
        "NORTHERN MARIANA ISLANDS",
        "NORWAY",
        "OMAN",
        "PAKISTAN",
        "PALAU",
        "PALESTINE, STATE OF",
        "PANAMA",
        "PAPUA NEW GUINEA",
        "PARAGUAY",
        "PARMA",
        "PERU",
        "PHILIPPINES",
        "PITCAIRN",
        "POLAND",
        "PORTUGAL",
        "PUERTO RICO",
        "QATAR",
        "REPUBLIC OF VIETNAM",
        "REUNION",
        "ROMANIA",
        "RUSSIAN FEDERATION",
        "RWANDA",
        "SAINT BARTHELEMY",
        "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA",
        "SAINT KITTS AND NEVIS",
        "SAINT LUCIA",
        "SAINT MARTIN (FRENCH PART)",
        "SAINT PIERRE AND MIQUELON",
        "SAINT VINCENT AND THE GRENADINES",
        "SAMOA",
        "SAN MARINO",
        "SAO TOME AND PRINCIPE",
        "SAUDI ARABIA",
        "SAXONY",
        "SENEGAL",
        "SERBIA",
        "SEYCHELLES",
        "SIERRA LEONE",
        "SINGAPORE",
        "SINT MAARTEN (DUTCH PART)",
        "SLOVAKIA",
        "SLOVENIA",
        "SOLOMON ISLANDS",
        "SOMALIA",
        "SOUTH AFRICA",
        "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS",
        "SOUTH SUDAN",
        "SPAIN",
        "SRI LANKA",
        "SUDAN",
        "SURINAME",
        "SVALBARD AND JAN MAYEN",
        "SWAZILAND",
        "SWEDEN",
        "SWITZERLAND",
        "SYRIAN ARAB REPUBLIC",
        "TAIWAN, PROVINCE OF CHINA",
        "TAJIKISTAN",
        "TANZANIA, UNITED REPUBLIC OF",
        "THAILAND",
        "TIMOR-LESTE",
        "TOGO",
        "TOKELAU",
        "TONGA",
        "TRINIDAD AND TOBAGO",
        "TUNISIA",
        "TURKEY",
        "TURKMENISTAN",
        "TURKS AND CAICOS ISLANDS",
        "TUSCANY",
        "TUVALU",
        "TWO SICILIES",
        "UGANDA",
        "UKRAINE",
        "UNITED ARAB EMIRATES",
        "UNITED KINGDOM",
        "UNITED STATES",
        "UNITED STATES MINOR OUTLYING ISLANDS",
        "URUGUAY",
        "UZBEKISTAN",
        "VANUATU",
        "VENEZUELA, BOLIVARIAN REPUBLIC OF",
        "VIET NAM",
        "VIRGIN ISLANDS, BRITISH",
        "VIRGIN ISLANDS, U.S.",
        "WALLIS AND FUTUNA",
        "WESTERN SAHARA",
        "WUERTTEMBURG",
        "YEMEN",
        "YEMEN ARAB REPUBLIC",
        "YEMEN PEOPLE'S REPUBLIC",
        "YUGOSLAVIA",
        "ZAMBIA",
        "ZANZIBAR",
        "ZIMBABWE",
        "ALABAMA",
        "ALASKA",
        "ARIZONA",
        "ARKANSAS",
        "CALIFORNIA",
        "COLORADO",
        "CONNECTICUT",
        "DELAWARE",
        "FLORIDA",
        "GEORGIA",
        "HAWAII",
        "IDAHO",
        "ILLINOIS",
        "INDIANA",
        "IOWA",
        "KANSAS",
        "KENTUCKY",
        "LOUISIANA",
        "MAINE",
        "MARYLAND",
        "MASSACHUSETTS",
        "MICHIGAN",
        "MINNESOTA",
        "MISSISSIPPI",
        "MISSOURI",
        "MONTANA",
        "NEBRASKA",
        "NEVADA",
        "NEW HAMPSHIRE",
        "NEW JERSEY",
        "NEW MEXICO",
        "NEW YORK",
        "NORTH CAROLINA",
        "NORTH DAKOTA",
        "OHIO",
        "OKLAHOMA",
        "OREGON",
        "PENNSYLVANIA",
        "RHODE ISLAND",
        "SOUTH CAROLINA",
        "SOUTH DAKOTA",
        "TENNESSEE",
        "TEXAS",
        "UTAH",
        "VERMONT",
        "VIRGINIA",
        "WASHINGTON",
        "WEST VIRGINIA",
        "WISCONSIN",
        "WYOMING",
        "DISTRICT OF COLUMBIA",
        "ENGLAND",
        "BRITISH VIRGIN ISLANDS",
        "NETHERLAND ANTILLES",
        "RUSSIA",
        "SOUTH KOREA",
        'TAIWAN',
        "VENEZUELA",
        'CHANNEL ISLANDS'
      )
    )
  }

.parse_page_sub_multi_item_html <-
  function(page) {
    locations <-
      .get_loc_df() %>%
      .$nameLocation
    subsidiaries <-
      page %>%
      html_nodes('td div') %>%
      html_text() %>%
      str_replace_all('\u0095 |\u0096|\u0095\n', '') %>%
      str_trim()

    subsidiaries <-
      subsidiaries[!subsidiaries == '']

    data_nodes <-
      page %>%
      html_nodes('td') %>%
      html_text() %>%
      str_replace_all('\u0095 |\u0096|\u0095\n', '') %>%
      str_trim() %>%
      str_to_upper()

    data_nodes <-
      data_nodes[!data_nodes == '']

    location_items <-
      data_nodes[data_nodes %in% locations]

    pct_vals <-
      tibble(value = data_nodes) %>%
      filter(!value %>% str_detect("\\([(1-9)]\\)")) %>%
      mutate(pctSubsidiaryOwned = value %>% as.numeric()) %>%
      filter(!pctSubsidiaryOwned %>% is.na()) %>%
      slice(seq_along(subsidiaries)) %>%
      .$pctSubsidiaryOwned / 100 %>%
      suppressWarnings() %>%
      suppressMessages()

    all_data <-
      tibble(
        nameSubsidiary = subsidiaries,
        nameLocationSubsidiary = location_items,
        pctSubsidiaryOwned = pct_vals
      ) %>%
      mutate(nameSubsidiary = nameSubsidiary %>% str_to_upper())

    return(all_data)
  }

.parse_page_subsidiary_table_html <-
  function(page,
           numbers = 1:10,
           hit_terms = c(
             "Organized",
             "STATE OR|STATE OF|JURISDICTION OF|JURISDICTION OF INCORPORATION OR ORGANIZATION|JURISDICTION|JURISDICTION OF INCORPORATION OR\nORGANIZATION",
             "NAME|ORGANIZED UNDER THE LAWS OF",
             'STATE OF ORGANIZATION',
             'STATE OR COUNTRY OF ORGANIZATION',
             'NAME OF SUBSIDIARY',
             'NAME',
             'ENTITY NAME',
             'the laws of',
             'Percentage of voting',
             'securities owned by',
             'immediate parent',
             'CERTAIN INTERMEDIARY SUBSIDIARIES',
             'Note:',
             'Organized',
             'Under the',
             'Laws of',
             'OWNED BY',
             'IMMEDIATE',
             'PARENT',
             "OWNS",
             "CERTAIN INTERMEDIARY SUBSIDIARIES",
             'PERCENTAGE',
             'OF VOTING',
             'SECURITIES'
           )) {
    is_ib1 <-
      page %>%
      html_nodes('b font') %>%
      html_text() %>% length() > 0

    if (is_ib1) {
      items_bold <-
        page %>%
        html_nodes('b font') %>%
        html_text() %>%
        str_to_upper() %>%
        str_replace_all('\n', ' ')
      items_bold <-
        stringi::stri_trans_general(items_bold, "Latin-ASCII")
      items_bold <-
        items_bold %>%
        str_split('\\-') %>%
        flatten_chr() %>%
        str_trim()
    } else {
      items_bold <-
        page %>%
        html_nodes('b') %>%
        html_text() %>%
        str_to_upper() %>%
        str_replace_all('\n', ' ') %>%
        stringi::stri_trans_general("Latin-ASCII")
      items_bold <-
        items_bold %>%
        str_split('\\-') %>%
        flatten_chr() %>%
        str_trim() %>%
        unique()
    }

    has_date <-
      items_bold %>% grep(month.name %>% str_to_upper() %>% paste(collapse = '|'), .) %>% length > 0

    if (has_date) {
      date_data <-
        items_bold[items_bold %>% grep(month.name %>% str_to_upper() %>% paste(collapse = '|'), .)] %>%
        lubridate::mdy()
    } else {
      date_data <-
        NA
    }

    hit_terms <-
      hit_terms %>%
      append(items_bold) %>%
      str_to_upper() %>%
      unique() %>%
      append(list('(', letters, ')') %>%
               purrr::invoke(paste0, .)) %>%
      paste0(collapse = '|')


    hit_terms_in <-
      hit_terms %>% str_split('\\|') %>%
      flatten_chr()

    locations <-
      .get_loc_df() %>%
      .$nameLocation

    all_data <-
      numbers %>%
      future_map_dfr(function(x) {
        css_selector <-
          paste0('td:nth-child(', x, ')')
        has_length <-
          page %>%
          html_nodes(css_selector) %>% length() > 0
        if (has_length) {
          item <-
            paste0("X" , x)

          value <-
            page %>%
            html_nodes(css_selector) %>%
            html_text() %>%
            str_trim()
          tibble(item, value)
        }
      }) %>%
      mutate(
        value = value %>% str_to_upper() %>% str_replace_all('\n  ', ' ') %>% str_replace_all('\u0096 ', '')
      ) %>%
      filter(!value == '')

    has_loc_key <-
      all_data %>%
      filter(value %in% locations) %>%
      nrow() > 0

    if (has_loc_key) {
      loc_cols <-
        all_data %>%
        filter(value %in% locations) %>%
        .$item %>%
        unique()
      if (loc_cols %>% length == 1) {
        loc_col <-
          loc_cols[[1]]
      }
    }

    has_pct <-
      all_data %>%
      filter(value %>% str_detect("PERCENT")) %>%
      .$item %>% unique() %>% length() > 0

    if (has_pct) {
      pct_col <-
        all_data %>%
        filter(value %>% str_detect("PERCENT")) %>%
        .$item %>% unique()
    } else {
      pct_col <-
        NA
    }

    is_whack <-
      pct_col[[1]] %in% loc_cols

    if (is_whack) {
      all_data <-
        page %>%
        .parse_page_sub_multi_item_html() %>%
        mutate(dateSubsidiaryAsOf = date_data)

      return(all_data)
    }

    all_data <-
      all_data %>%
      filter(!value %in% items_bold) %>%
      filter(!value %>% str_detect(paste0(items_bold %>% unique(), collapse = '|'))) %>%
      filter(!value %in% hit_terms_in) %>%
      filter(!value %>% str_detect(hit_terms))

    count_df <-
      all_data %>% count(item, sort = T) %>%
      arrange(item) %>%
      spread(item, n)

    off_one <-
      (count_df[, 2] %>% extract2(1)) - (count_df[, 1] %>% extract2(1)) == 1

    min_item <-
      count_df %>% gather(item, value) %>% filter(value == min(value)) %>% .$item

    change_pct <-
      has_pct & (pct_col == min_item) %>% sum() > 0

    if (change_pct) {
      pct_col <-
        names(count_df)[[3]]
    }

    if (off_one) {
      df <-
        all_data$item %>% unique() %>%
        future_map_dfr(function(x) {
          has_data <-
            all_data %>%
            filter(item == x) %>%
            filter(!value %>% is.na()) %>%
            filter(!value == '') %>%
            nrow()

          if (has_data) {
            all_data %>%
              filter(item == x) %>%
              filter(!value %>% is.na()) %>%
              filter(!value == '') %>%
              filter(!value %>% str_detect(hit_terms)) %>%
              mutate(idSubsidiary = 1:n())
          }
        }) %>%
        filter(!value %>% str_detect(hit_terms)) %>%
        spread(item, value)

      if (change_pct) {
        df <-
          df %>%
          select(-one_of(min_item))
      }
    }

    if (!off_one) {
      has_property <-
        items_bold %>% str_detect('PROPERTY') %>% sum() > 0
      if (has_property) {
        tables <-
          page %>%
          html_table(fill = T)
        df <-
          seq_along(tables) %>%
          future_map_dfr(function(x) {
            table_df <-
              tables[[x]] %>%
              data.frame(stringsAsFactors = FALSE) %>%
              as_tibble()

            column_df <-
              table_df %>% slice(1) %>%
              gather(column, value) %>%
              mutate(idColumn = 1:n()) %>%
              filter(!value %>% is.na()) %>%
              left_join(tibble(
                value = c(
                  "PROPERTY",
                  "ENTITIES",
                  "STATE OF FORMATION",
                  "DATE OF FORMATION",
                  " ",
                  'General Information:'
                ),
                nameItem = c(
                  'nameProperty',
                  'nameSubsidiary',
                  'locationOrganizationSubsidiary',
                  'dateSubsidiaryFormed',
                  'locationOrganizationSubsidiary',
                  'nameSubsidiary'
                )
              )) %>%
              suppressMessages()
            two_col <-
              column_df %>% nrow() == 2
            if (two_col) {
              column_df$nameItem[[2]] <-
                'locationOrganizationSubsidiary'
            }

            columns_keep <-
              column_df$idColumn

            table_df <-
              table_df <-
              table_df %>%
              select(columns_keep) %>%
              slice(-1) %>%
              purrr::set_names(column_df$nameItem)

            table_df <-
              table_df %>%
              mutate_all(funs(. %>% str_trim() %>% str_to_upper())) %>%
              mutate(nameSubsidiary = ifelse(nameSubsidiary == '', NA, nameSubsidiary)) %>%
              filter(!nameSubsidiary %>% is.na())


            if (two_col) {
              table_df <-
                table_df %>%
                tidyr::separate(
                  locationOrganizationSubsidiary,
                  into = c(
                    'locationOrganizationSubsidiary',
                    'dateSubsidiaryFormed'
                  ),
                  sep = 'FORMED'
                ) %>%
                suppressWarnings() %>%
                mutate(locationOrganizationSubsidiary = locationOrganizationSubsidiary %>% str_replace_all('\\,', '')) %>%
                mutate_all(funs(. %>% str_replace('\n', '') %>% str_trim()))
            }


            if ('nameProperty' %in% names(table_df)) {
              table_df <-
                table_df %>%
                mutate(nameProperty = ifelse(nameProperty == '', NA, nameProperty)) %>%
                mutate_all(funs(. %>% str_replace('\n|\n  |\n  ', '') %>% str_trim())) %>%
                mutate_all(funs(. %>% str_replace('\n', '') %>% str_trim())) %>%
                mutate_all(funs(. %>% str_replace('  ', ' ') %>% str_trim())) %>%
                fill(nameProperty)

            }

            return(table_df)
          })

        if ('dateSubsidiaryFormed' %in% names(df)) {
          df <-
            df %>%
            mutate(dateSubsidiaryFormed = dateSubsidiaryFormed %>% lubridate::mdy())
        }

        df <-
          df %>%
          mutate(idCIK = cik, urlSEC = url) %>%
          select(idCIK, nameSubsidiary, everything()) %>%
          mutate(
            locationOrganizationSubsidiary = locationOrganizationSubsidiary %>% str_replace_all(
              'A |LIMITED LIABILITY COMPANY|CORPORATION|LIMITED PARTNERSHIP'
            ) %>% str_trim()
          )

        return(df)
      }
      if (!has_property) {
        df <-
          all_data %>%
          mutate(value = ifelse(value == '', NA, value)) %>%
          filter(!value %>% is.na()) %>%
          group_by(item) %>%
          mutate(idSubsidiary = 1:n()) %>%
          spread(item, value) %>%
          filter(!X1 == '') %>%
          mutate(idSubsidiary = 1:n()) %>%
          gather(item, value, -c(X1, idSubsidiary)) %>%
          ungroup() %>%
          filter(!value %>% str_detect(hit_terms)) %>%
          spread(item, value)
      }

    }

    df <-
      df %>%
      dplyr::rename(nameSubsidiary = X1) %>%
      tidyr::separate(nameSubsidiary,
                      sep = '\\(',
                      into = c('nameSubsidiary', 'remove')) %>%
      select(-dplyr::matches("remove")) %>%
      mutate(nameSubsidiary = nameSubsidiary %>% str_trim()) %>%
      suppressWarnings() %>%
      select(-dplyr::matches("idSubsidiary"))

    if (has_pct) {
      names(df)[names(df) %>% grep(pct_col, .)] <-
        'pctSubsidiaryOwned'

      df <-
        df %>%
        mutate_at(df %>% select(dplyr::matches('pct')) %>% names(),
                  funs(. %>% as.numeric() / 100)) %>%
        suppressWarnings()
    }

    if (has_loc_key) {
      names(df)[names(df) %>% grep(loc_col, .)] <-
        'locationOrganizationSubsidiary'
    }

    df <-
      df %>%
      select(-dplyr::matches("X"))

    return(df)
  }

.parse_sec_subsidiary_url_html <-
  function(url = "https://www.sec.gov/Archives/edgar/data/34088/000003408816000065/xomexhibit21.htm",
           return_message = TRUE) {
    cik <-
      url %>%
      .parse_sec_url_for_cik()

    page <-
      url %>%
      read_html()

    is_zero <-
      page %>%
      html_nodes(paste0('td:nth-child(', 1, ')')) %>%
      length() == 0
    locations <-
      .get_loc_df() %>%
      .$nameLocation

    if (is_zero) {
      data <-
        page %>%
        html_nodes('font') %>%
        html_text() %>%
        str_replace_all('\\ ', ' ')

      data <-
        data[!data == '']


      is_parenth <-
        data %>% str_detect('\\(') %>% sum() / length(data) > .25

      if (is_parenth) {
        data <-
          data[data %>% str_detect('\\(')]

        df <-
          tibble(data) %>%
          separate(
            data,
            sep = '\\(',
            into = c('nameSubsidiary', 'locationOrganizationSubsidiary')
          ) %>%
          separate(
            locationOrganizationSubsidiary,
            sep = '\\)',
            into = c('locationOrganizationSubsidiary', 'remove')
          ) %>%
          select(-remove) %>%
          mutate_all(funs(. %>% str_trim() %>% str_to_upper())) %>%
          mutate(idCIK = cik, urlSEC = url) %>%
          select(-dplyr::matches("idSubsidiary"))

        if (return_message) {
          list("Parsed: ", url) %>%
            purrr::invoke(paste0, .) %>% cat(fill = T)
        }

        return(df)
      }

      is_nested <-
        page %>%
        html_nodes('b font') %>%
        html_text() %>% length() > 2

      if (is_nested) {
        locations_raw <-
          page %>%
          html_nodes('b font') %>%
          html_text() %>%
          str_replace_all('\\:', '') %>%
          str_to_upper()

        locations <-
          locations_raw[!locations_raw %>% str_detect('EXHIBIT|SUBSIDIARY|SUBSIDIARIES')]

        data <-
          data[data %>% nchar() > 3] %>% str_to_upper()

        df <-
          tibble(nameSubsidiary = data) %>%
          mutate(idRow = 1:n())

        .loc_df <-
          tibble(nameSubsidiary = locations) %>%
          inner_join(df %>% select(idRow, nameSubsidiary)) %>%
          mutate(idRow = idRow + 1) %>%
          select(locationOrganizationSubsidiary = nameSubsidiary, idRow) %>%
          suppressMessages()

        df <-
          df %>%
          filter(!nameSubsidiary %>% str_detect('SUBSIDIARY|SUBSIDIARIES')) %>%
          filter(!nameSubsidiary %>% str_detect(paste0(locations_raw, collapse = '|'))) %>%
          suppressWarnings()

        df <-
          df %>%
          left_join(.loc_df) %>%
          fill(locationOrganizationSubsidiary) %>%
          mutate(urlSEC = url, idCIK = cik) %>%
          select(idCIK,
                 nameSubsidiary,
                 locationOrganizationSubsidiary,
                 everything()) %>%
          select(-idRow) %>%
          suppressMessages() %>%
          select(-dplyr::matches("idSubsidiary"))
        if (return_message) {
          list("Parsed: ", url) %>%
            purrr::invoke(paste0, .) %>% cat(fill = T)
        }

        return(df)
      }
    }

    is_font_table <-
      page %>%
      html_nodes('b') %>%
      html_text() %>% length() == 0

    if (is_font_table) {
      all_data <-
        1:10 %>%
        future_map_dfr(function(x) {
          css_selector <-
            paste0('td:nth-child(', x, ')')
          has_length <-
            page %>%
            html_nodes(css_selector) %>% length() > 0
          if (has_length) {
            item <-
              paste0("X" , x)

            value <-
              page %>%
              html_nodes(css_selector) %>%
              html_text() %>%
              str_trim()
            tibble(item, value)
          }
        }) %>%
        mutate(
          value = value %>% str_to_upper() %>% str_replace_all('\n  ', ' ') %>% str_replace_all('\u0096 ', '')
        ) %>%
        filter(!value == '')


      has_loc_key <-
        all_data %>%
        filter(value %in% locations) %>%
        nrow() > 0

      if (has_loc_key) {
        loc_col <-
          all_data %>%
          filter(value %in% locations) %>%
          .$item %>%
          unique()
      }

      hit_terms_in <-
        c(
          "Organized",
          "STATE OR|STATE OF|JURISDICTION OF|JURISDICTION OF INCORPORATION OR ORGANIZATION|JURISDICTION|JURISDICTION OF INCORPORATION OR\nORGANIZATION",
          "NAME|ORGANIZED UNDER THE LAWS OF",
          'STATE OF ORGANIZATION',
          'STATE OR COUNTRY OF ORGANIZATION',
          'NAME OF SUBSIDIARY',
          'NAME',
          'ENTITY NAME',
          'the laws of',
          'Percentage of voting',
          'securities owned by',
          'immediate parent',
          'CERTAIN INTERMEDIARY SUBSIDIARIES',
          'PERCENT OWNED'
        )
      hit_terms <-
        hit_terms %>%
        str_to_upper() %>%
        paste0(collapse = '|')

      hit_terms_in <-
        hit_terms %>% str_split('\\|') %>%
        flatten_chr()

      has_pct_col <-
        all_data %>%
        filter(value %in% "100") %>%
        nrow() > 0 |
        (all_data %>% filter(value %>% str_detect('PERCENT')) %>% nrow() > 0)

      if (has_pct_col) {
        pct_col <-
          all_data %>%
          filter((value %in% "100") |
                   (value %>% str_detect("PERCENT"))) %>%
          .$item %>%
          unique() %>%
          .[[1]]
      }

      all_data <-
        all_data %>%
        filter(!value %in% hit_terms_in) %>%
        filter(!value %>% str_detect(hit_terms)) %>%
        filter(!value == '') %>%
        mutate(valueNC = value %>% nchar()) %>%
        filter(!value %>% str_detect("PERCENT"))

      if (!has_pct_col) {
        all_data <-
          all_data %>%
          filter(valueNC > 3)
      }
      all_data <-
        all_data %>%
        select(-valueNC) %>%
        group_by(item) %>%
        mutate(idSubsidiary = 1:n()) %>%
        spread(item, value) %>%
        ungroup() %>%
        dplyr::rename(nameSubsidiary = X1)

      if (has_loc_key) {
        names(all_data)[names(all_data) %in% loc_col] <-
          'locationOrganizationSubsidiary'
      }

      if (has_pct_col) {
        names(all_data)[names(all_data) %in% pct_col] <-
          'pctSubsidiaryOwned'

        all_data <-
          all_data %>%
          mutate(pctSubsidiaryOwned = pctSubsidiaryOwned %>% as.numeric() / 100)
      }

      all_data <-
        all_data %>%
        mutate(idCIK = cik,
               dateSubsidiaryAsOf = NA,
               urlSEC = url) %>%
        select(-dplyr::matches("idSubsidiary|^X"))

      if (return_message) {
        list("Parsed: ", url) %>%
          purrr::invoke(paste0, .) %>% cat(fill = T)
      }

      return(all_data)

    }

    df <-
      page %>%
      .parse_page_subsidiary_table_html() %>%
      suppressWarnings()

    df <-
      df %>%
      filter(!nameSubsidiary == '') %>%
      mutate(idCIK = cik, urlSEC = url) %>%
      select(-dplyr::matches("idSubsidiary")) %>%
      select(idCIK, everything())

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df %>% select(-dplyr::matches("idSubsidiary")))

  }

# url = 'https://www.sec.gov/Archives/edgar/data/19617/000095012301002499/y46253ex21-1.txt'
.parse_sec_subsidiary_url_text <-
  function(url = "https://www.sec.gov/Archives/edgar/data/899689/000104746903007996/a2104897zex-21.txt",
           return_message = TRUE) {
    cik <-
      url %>%
      .parse_sec_url_for_cik()
    data <-
      url %>%
      read_lines()

    data <-
      data[!data == '']
    has_s <-
      data %>% str_detect("<S>") %>% sum() > 0

    if (has_s) {
      data <-
        data[(data %>% grep("<S>", .) %>% .[[1]] + 1):length(data)]
    }

    data <-
      data[!data %>% str_detect("STATE OF|NAME OF|---|NAME OF SUBSIDIARY|ORGANIZED UNDER|THE LAWS OF|<")]

    data <-
      data[data %>% nchar() > 3]

    df <-
      seq_along(data) %>%
      future_map_dfr(function(x) {
        item <-
          data[[x]]

        items <-
          item %>%
          str_replace_all('\\   ', '\\:') %>%
          str_split('\\:') %>%
          flatten_chr() %>%
          str_trim() %>%
          str_to_upper()

        items <-
          items[!items == '']

        if (items %>% length() == 1) {
          return(tibble())
        }

        two_items <-
          items %>% length() == 2
        if (two_items) {
          table_data <-
            tibble(
              idSubsidiary = x,
              nameSubsidiary = items[[1]],
              locationOrganizationSubsidiary = items[[2]]
            )
        }
        three_items <-
          items %>% length() == 3
        if (three_items) {
          table_data <-
            tibble(
              idSubsidiary = x,
              nameSubsidiary = items[[1]],
              locationOrganizationSubsidiary = items[[2]],
              pctSubsidiaryOwned = items[[3]] %>% as.numeric() / 100
            )
        }

        table_data <-
          table_data %>%
          mutate(
            isChildSubsidiary = ifelse(nameSubsidiary %>% substr(1, 1) == "-", TRUE, FALSE),
            nameSubsidiary = nameSubsidiary %>% str_replace('\\-', '') %>% str_trim()
          )
        return(table_data)
      }) %>%
      mutate(idCIK = cik, urlSEC = url) %>%
      select(-dplyr::matches("idSubsidiary")) %>%
      select(idCIK,
             nameSubsidiary,
             locationOrganizationSubsidiary,
             everything()) %>%
      filter(!nameSubsidiary %in% c('NAME', 'ORGANIZED UNDER'))

    df <-
      df %>%
      filter(!nameSubsidiary == '')

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df)

  }

.parse_sec_subsidiary_url  <-
  function(url = "https://www.sec.gov/Archives/edgar/data/34088/000003408816000065/xomexhibit21.htm",
           return_message = TRUE)  {
    is_text <-
      url %>%
      str_detect("txt")

    is_html <-
      url %>%
      str_detect("html|htm")
    parse_sec_subsidiary_url_text_safe <-
      purrr::possibly(.parse_sec_subsidiary_url_text, tibble())

    parse_sec_subsidiary_url_html_safe <-
      purrr::possibly(.parse_sec_subsidiary_url_html, tibble())

    if (is_text) {
      data <-
        url %>%
        parse_sec_subsidiary_url_text_safe()
    }

    if (is_html) {
      data <-
        url %>%
        parse_sec_subsidiary_url_html_safe()
    }
    return(data)
  }


# form_parsing ------------------------------------------------------------


.parse_full_form_names <-
  function(sec_names) {
    df_names <-
      seq_along(sec_names) %>%
      future_map_dfr(function(x) {
        sec_name <-
          sec_names[[x]]

        name_pieces <-
          sec_name %>% str_replace_all('\\.value|\\.item', '')

        pieces <-
          name_pieces %>%
          str_split('\\.') %>%
          flatten_chr()

        pieces_no_num <-
          pieces[!pieces %>% str_detect("[0-9]")]
        peice_length <-
          pieces_no_num %>% length()

        is_street <-
          pieces %>% str_detect("street1|street2|Street1|Street2") %>% sum(na.rm = T) > 0

        name_item <-
          pieces_no_num[length(pieces_no_num)]

        if (sec_name %>% str_detect('filingManager')) {
          name_item <-
            pieces %>% paste0(collapse = '')

          df <-
            tibble(nameSECFull = sec_name,
                   nameSEC = name_item)
          return(df)
        }

        if (is_street) {
          name_item <-
            pieces[pieces %>% str_detect("street1|street2|Street1|Street2")]
        }

        is_sig <-
          name_pieces %>% str_detect('signature') & peice_length == 1

        is_footnote <-
          sec_name %>% str_detect('footnote')

        is_issuer <-
          sec_name %>% str_detect('\\issuer.[A-Z]')

        is_federal <-
          sec_name %>% str_detect(pattern = "federalExemptionsExclusions")

        if (is_federal) {
          df <-
            tibble(
              nameSECFull = sec_name,
              nameTable = pieces[[1]],
              nameSEC = name_item
            )

          return(df)
        }

        if (is_issuer) {

          items <-
            sec_name %>% str_split('\\.') %>% flatten_chr()

          countItem <-
            pieces[2] %>% as.character() %>% readr::parse_number() %>% suppressWarnings()

          name_item <-
            items[length(items)]

          df <-
            tibble(
              nameSECFull = sec_name,
              nameTable = 'issuer',
              countItem,
              nameSEC = name_item
            )
          return(df)
        }

        if (is_footnote) {
          if (pieces %>% length() == 1) {
            countItem <-
              0
            item <-
              pieces[[1]]
          } else {
            item <-
              pieces[[1]]
            countItem <-
              pieces[2] %>%as.character() %>%  readr::parse_number() %>% suppressWarnings()
          }
          return(tibble(nameTable = 'footnotes', nameSECFull = sec_name, nameSEC = item, countItem))
        }

        if (is_sig) {
          df <-
            tibble(nameTable = 'signatures', nameSECFull = sec_name, nameSEC = name_item)
          return(df)
        }

        if (peice_length == 1) {
          df <-
            tibble(nameSECFull = sec_name, nameSEC = name_item)
          return(df)
        }

        piece_count <-
          length(pieces)

        if (piece_count == 1) {
          df <-
            tibble(nameSECFull = sec_name, nameSEC = sec_name)
          return(df)
        }

        if (piece_count == 2 &!is_footnote) {


          df <-
            tibble(nameSECFull = sec_name,
                   nameTable = pieces[[1]] ,
                   nameSEC = name_item)

          return(df)
        }

        if (piece_count > 2) {
          countItem <-
            pieces[2] %>%as.character() %>%  readr::parse_number() %>% suppressWarnings()

          df <-
            tibble(
              nameSECFull = sec_name,
              nameTable = pieces[[1]] ,
              countItem,
              nameSEC = name_item
            )

          return(df)
        }

      }) %>%
      filter(!nameSEC == '')

    df_dictionary <-
      .sec_form_title_df()

    has_missing_names <-
      df_names$nameSEC[!df_names$nameSEC %in% df_dictionary$nameSEC] %>%
      length() > 0
    if (has_missing_names) {
      missing <-
        df_names$nameSEC[!df_names$nameSEC %in% df_dictionary$nameSEC] %>%
        unique()

      missing_names <-
        missing %>%
        paste0(collapse = '\n')
      stop(list("Missing:\n", missing_names) %>%
             purrr::reduce(paste0))
    }

    df_names <-
      df_names %>%
      left_join(df_dictionary) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (!'nameTable' %in% names(df_names)) {
      df_names <-
        df_names %>%
        mutate(nameTable = 'asset')
    }

    df_names <-
      df_names %>%
      select(nameTable, nameSECFull, nameSEC, nameActual, everything()) %>%
      mutate(nameTable = nameTable %>% str_replace('Id',''),
             nameTable = ifelse(nameTable %in% c('issuerCredentials','securitiesIssued'), NA, nameTable)) %>%
      suppressWarnings() %>%
      suppressMessages()
  }

.parse_xml_tables <-
  function(url = "https://www.sec.gov/Archives/edgar/data/61004/000114036117000046/doc1.xml"){
    page <-
      url %>%
      xml2::read_xml()

    tables <-
      page %>%
      xml_contents() %>%
      xml_name() %>%
      unique()

    data <-
      seq_along(tables) %>%
      future_map_dfr(function(x){
        table <-
          tables[[x]]

        if (table %in% c('headerData', 'formData')) {
          form_tables <-
            page %>% xml_contents() %>% xml_name()

          table_loc <-
            table %>% grep(form_tables)
          xml_nodes <-
            page %>%
            xml_contents() %>% .[[table_loc]]
        }

        if (table %in% c('infoTable' , 'assets')) {
          xml_nodes <-
            page %>%
            xml_contents()
        }

        if (table == 'comment') {
          value <-
            page %>% xml_contents() %>% xml_text()

          df <-
            tibble(idTable = x, nameSECFull = table, value)
          return(df)

        }

        tables_special <- c('headerData', 'formData', 'infoTable', 'assets')

        if (!table %in% tables_special) {

          value_search <-
            list('//', table) %>% purrr::reduce(paste0)

          xml_nodes <-
            page %>%
            xml_contents() %>%
            xml_find_all(value_search)
        }
        if (xml_nodes %>% length() > 100) {
          list("Be patient there are ", xml_nodes %>% length() %>% formattable::comma(digits = 0), ' nodes to parse') %>%
            purrr::reduce(paste0) %>% cat(fill = T)
        }
        value_list <-
          xml_nodes %>% as_list()

        value_list <-
          value_list[value_list %>% future_map(length) %>% flatten_dbl() > 0]

        json_data <-
          value_list %>%
          jsonlite::toJSON(force = FALSE, dataframe = 'values') %>%
          jsonlite::fromJSON(simplifyDataFrame = TRUE, flatten = TRUE)

        wrong_output <-
          json_data %>% class() == 'array'

        if (wrong_output) {
          item <-
            xml_nodes %>% xml_name()
          value <-
            xml_nodes %>% xml_text()
          json_data <-
            tibble(item, value) %>%
            spread(item, value)
        }

        if (json_data %>% length() == 0) {
          return(tibble())
        }
        if ('summaryInfo' %in% names(json_data)) {
          json_data <-
            seq_along(json_data) %>% map(
              function(x){
                js_d <- json_data[x]
                if ('summaryInfo' %in% names(js_d)) {
                  if (js_d$summaryInfo$clarificationResponses %>% length() == 0) {
                    js_d$summaryInfo$clarificationResponses <-
                      NULL
                  }
                }
                return(js_d)
              }) %>%
            flatten()

          json_data <-
            json_data[json_data %>% future_map(function(x){data.frame(x, stringsAsFactors = F)} %>% nrow()) > 0]
        }

        json_data <-
          json_data %>%
          data.frame(stringsAsFactors = FALSE) %>%
          as_tibble() %>%
          mutate_all(as.character) %>%
          mutate(idTable = x) %>%
          gather(nameSECFull, value, -idTable) %>%
          arrange(idTable)
        return(json_data)
      })

    data <-
      data %>%
      mutate(isList = value %>% str_detect('list')) %>%
      filter(!isList) %>%
      select(-isList) %>%
      mutate(
        nameSECFull = nameSECFull %>% str_replace_all(
          "filerInfo.flags.|filerInfo.filer.|coverPage.|.filer.|\\flags.|filer.credentials.",
          ''
        ),
        nameSECFull = nameSECFull %>% str_replace_all('filerInfo.|issuerCredentials.', '')
      )


    rm(tables)
    rm(page)
    rm(url)
    return(data)
  }

.parse_sec_form <-
  function(url = "https://www.sec.gov/Archives/edgar/data/61004/000114036117000046/doc1.xml",
           return_message = TRUE) {
    data <-
      .parse_xml_tables(url = url)

    if (!'nameSECFull' %in% names(data)) {
      data <-
        data %>%
        mutate(nameSECFull = nameSEC)
    }

    cik <-
      url %>% str_replace_all('https://www.sec.gov/Archives/edgar/data/', '') %>% str_split('/') %>% flatten_chr() %>% .[[1]] %>% as.character() %>% readr::parse_number() %>% suppressMessages()

    df_title <-
      .sec_form_title_df()

    is_13FInfo <-
      url %>% str_detect('form13fInfoTable.xml|infotable.xml')
    sec_names <-
      data$nameSECFull %>% unique()

    df_names <-
      .parse_full_form_names(sec_names = sec_names)

    df_names <-
      df_names %>%
      mutate(nameTable = ifelse(
        nameSECFull %>% str_detect("issuerAddress"),
        "issuerAddress",
        nameTable),
        nameTable =  ifelse(
          nameSECFull %>% str_detect("reportingOwner"),
          "reportingOwner",
          nameTable)
      ) %>%
      mutate(nameTable = ifelse(nameSECFull %>% str_detect("issuerInfo."), 'issuerInfo', nameTable),
             nameTable = ifelse(nameSECFull %>% str_detect("securitiesIssued."), 'securitiesIssued', nameTable),
             nameTable = ifelse(nameSECFull %>% str_detect("summaryInfo."), 'summaryInfo', nameTable),
             nameTable = ifelse(nameSECFull %>% str_detect("^comment[A-Z]"), 'Comments', nameTable)
      )

    if (is_13FInfo) {
      df_names <-
        df_names %>%
        mutate(nameTable = 'holdingsInformation')
    }
    if (!'nameSEC' %in% names(data)) {
      data <- data %>%
        mutate(nameSEC = nameSECFull)
    }
    data <-
      data %>%
      select(-nameSEC) %>%
      left_join(df_names) %>%
      mutate(nameActual = ifelse(nameSECFull == "X.1.A.A.", 'idForm', nameActual)) %>%
      suppressMessages()

    if ('countItem' %in% names(data)) {
      data <-
        data %>%
        select(nameTable, countItem, nameSECFull, nameActual, everything()) %>%
        mutate(countItem = countItem - 1) %>%
        suppressMessages()
    }

    if ('property' %in% data$nameTable) {
      data <-
        data %>%
        mutate(nameTable = ifelse(nameTable %>% is.na(), 'Asset', nameTable))
    }

    has_metadata <-
      data %>%
      filter(nameTable %>% is.na()) %>% nrow() > 0

    if (has_metadata) {
      df_metadata <-
        data %>%
        filter(nameTable %>% is.na()) %>%
        select(nameActual, value) %>%
        group_by(nameActual) %>%
        mutate(countItem = 1:n() - 1) %>%
        arrange(countItem) %>%
        ungroup() %>%
        filter(!nameActual %>% str_detect('idCCC')) %>%
        mutate(nameActual = ifelse(countItem == 0, nameActual, nameActual %>% paste0(countItem))) %>%
        select(-countItem)

      col_order <-
        df_metadata$nameActual

      df_metadata <-
        df_metadata %>%
        spread(nameActual, value) %>%
        select(one_of(col_order)) %>%
        mutate(urlSECFiling = url) %>%
        .resolve_form_columns()
    } else {
      df_metadata <-
        tibble(idCIKFiler = cik,
               urlSECFiling = url)
    }

    tables <-
      data %>%
      filter(!nameTable %>% is.na()) %>%
      .$nameTable %>%
      unique()

    data <-
      seq_along(tables) %>%
      future_map(function(x) {
        table <-
          tables[[x]]
        table_name <-
          list('data',
               table %>% substr(1, 1) %>% str_to_upper(),
               table %>% substr(2, nchar(table))) %>%
          purrr::reduce(paste0)

        table_df <-
          data %>%
          filter(nameTable == table) %>%
          select(dplyr::matches("countItem"), nameActual, value) %>%
          select(which(colMeans(is.na(.)) < 1)) %>%
          group_by(nameActual) %>%
          mutate(countItem = 1:n() - 1) %>%
          ungroup()

        has_counts <-
          table_df$countItem %>% max(na.rm = TRUE) > 0

        if (has_counts) {
          table_df <-
            table_df %>%
            arrange(countItem)

          col_order <- c('countItem', table_df$nameActual)

          table_df <-
            table_df %>%
            spread(nameActual, value) %>%
            select(one_of(col_order)) %>%
            mutate(urlSECFiling = url) %>%
            .resolve_form_columns()

          table_df <-
            table_df %>%
            nest(-urlSECFiling, .key = data)
        } else {
          table_df <-
            table_df %>%
            select(-countItem)
          col_order <- c(table_df$nameActual)

          table_df <-
            table_df %>%
            spread(nameActual, value) %>%
            select(one_of(col_order)) %>%
            .resolve_form_columns() %>%
            mutate(urlSECFiling = url)

          table_df <-
            table_df %>%
            nest(-urlSECFiling, .key = data)
        }
        names(table_df)[[2]] <-
          table_name

        df_metadata <-
          df_metadata %>%
          left_join(table_df) %>%
          suppressMessages()

      }) %>%
      reduce(left_join) %>%
      suppressMessages()

    ## maybe add IDCK

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    rm(df_metadata)
    return(data)
  }

.parse_form_data <-
  function(.all_filings, filter_parameter = 'isXBRLInstanceFile', return_message = TRUE) {
    df_search <-
      .all_filings %>%
      filter_(.dots = filter_parameter)

    if (filter_parameter == 'isXBRLInstanceFile') {
      if (df_search %>% nrow() == 0) {
        return(tibble())
      }
      parse_xbrl_filer_url_safe <-
        purrr::possibly(.parse_xbrl_filer_url, tibble())
      all_data <-
        df_search$urlSECFiling %>%
        unique() %>%
        future_map_dfr(function(x) {
          .parse_xbrl_filer_url(url = x, return_message = return_message)
        })
      all_data <-
        all_data %>%
        select(-dplyr::matches("idCIK1|nameFiler1")) %>%
        left_join(df_search %>% select(idForm, idAccession, nameFile, dateFiling, urlSECFiling)) %>%
        select(
          dplyr::matches("idCIK"),
          dplyr::matches("name[Entity]|name[Filer]"),
          dateFiling,
          idForm,
          idAccession,
          nameFile,
          everything()
        ) %>%
        suppressMessages()

      return(all_data)
    }

    if (filter_parameter == 'isFormD') {
      if ('idForm' %in% names(df_search)){
        df_search <-
          df_search %>%
          filter(!idForm %>% str_detect("10"))
      }
    }
    if (df_search %>% nrow() == 0) {
      return(tibble())
    }
    all_data <-
      df_search$urlSECFiling %>%
      unique() %>%
      future_map_dfr(function(x) {
        .parse_sec_form(url = x, return_message = return_message)
      })

    all_data <-
      all_data %>%
      select(-dplyr::matches("idCIK1|nameFiler1")) %>%
      left_join(df_search %>% select(dplyr::matches("idForm"), dplyr::matches("idAccession"), dplyr::matches("nameFile"), dplyr::matches("dateFiling"), urlSECFiling)) %>%
      select(
        dplyr::matches("idCIK"),
        dplyr::matches("name[Entity]|name[Filer]"),
        dateFiling,
        dplyr::matches("idForm"),
        dplyr::matches("idAccession"),
        dplyr::matches("nameFile"),
        everything()
      ) %>%
      suppressMessages()

    if (filter_parameter == 'hasAssetFile') {
      if('dataComments' %in% names(all_data)) {
        df_comments <-
          all_data %>%
          select(idCIKFiler, idAccession, dataComments) %>%
          mutate(isNULL = dataComments %>% map_lgl(is_null)) %>%
          filter(!isNULL) %>%
          distinct() %>%
          select(-isNULL)

        all_data <-
          all_data %>%
          select(-dataComments) %>%
          mutate(isNULL = dataAsset %>% map_lgl(is_null)) %>%
          filter(!isNULL) %>%
          filter(!nameFile == "ASSET RELATED DOCUMENT") %>%
          distinct() %>%
          select(-isNULL) %>%
          left_join(df_comments) %>%
          suppressMessages()
      }
    }
    return(all_data)
  }


# XBRL Finder -------------------------------------------------------------
.parse_xbrl_filer_url <-
  function(url = "https://www.sec.gov/Archives/edgar/data/1037540/000165642316000023/bxp-20160930.xml",
           return_message = TRUE) {
    options(stringsAsFactors = FALSE, scipen = 999999)
    cik <-
      url %>%
      str_split('data/') %>%
      flatten_chr() %>%
      .[[2]] %>%
      str_split('/') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()
    td <-
      tempdir()
    tf <-
      tempfile(tmpdir = td, fileext = ".xml")

    url %>%
      curl::curl_download(destfile = tf)

    doc <-
      tf %>%
      XBRL::xbrlParse()


    ## Get a data frame with facts:
    df_fct <-
      XBRL::xbrlProcessFacts(doc) %>%
      as_tibble()

    df_fct <-
      df_fct %>%
      mutate(
        isNumber = ifelse(!fact %>% as.character() %>% readr::parse_number() %>% is.na(), TRUE, FALSE),
        amountFact = ifelse(isNumber == TRUE, fact %>% as.character() %>% readr::parse_number(), NA)
      ) %>%
      separate(elementId,
               c('codeElement', 'nameElement'),
               sep = '\\_',
               remove = FALSE) %>%
      suppressWarnings()
    ## Get a data frame with contexts:
    df_cts <-
      XBRL::xbrlProcessContexts(doc) %>%
      as_tibble()
    ## Get a data frame with units:
    df_unt <-
      XBRL::xbrlProcessUnits(doc) %>%
      as_tibble()

    df_sch <-
      XBRL::xbrlGetSchemaName(doc) %>%
      as_tibble()

    df_footnotes <-
      XBRL::xbrlProcessFootnotes(doc) %>%
      as_tibble()


    ## Free the external memory used:
    XBRL::xbrlFree(doc)
    url_xsd <-
      url %>% str_replace(".xml", ".xsd")
    url_xsd %>%
      curl_download(destfile = tf)

    ## Parse the schema file:
    docS <-
      tf %>%
      XBRL::xbrlParse()
    ## Get roles:
    df_rls <-
      docS %>%
      XBRL::xbrlProcessRoles() %>%
      as_tibble()

    ## calculation
    url_cal <-
      url %>% str_replace(".xml", "_cal.xml")
    if (httr::url_ok(url_cal) %>% suppressWarnings()){
      url_cal %>%
        curl_download(destfile = tf)

      docS <-
        tf %>%
        XBRL::xbrlParse()

      df_calcs <-
        docS %>%
        XBRL::xbrlProcessArcs(arcType = 'calculation') %>%
        as_tibble()
    } else {
      df_calcs <-
        tibble()
    }

    ## definition
    url_def <-
      url %>% str_replace(".xml", "_def.xml")

    url_def %>%
      curl_download(destfile = tf)

    docS <-
      tf %>%
      XBRL::xbrlParse()

    df_defs <-
      docS %>%
      XBRL::xbrlProcessArcs(arcType = 'definition') %>%
      as_tibble()

    ## labels
    url_lab <-
      url %>% str_replace(".xml", "_lab.xml")

    url_lab %>%
      curl_download(destfile = tf)

    docS <-
      tf %>%
      XBRL::xbrlParse()

    df_labels <-
      docS %>%
      XBRL::xbrlProcessLabels() %>%
      as_tibble()

    ## presentation
    url_pre <-
      url %>% str_replace(".xml", "_pre.xml")

    url_pre %>%
      curl_download(destfile = tf)

    docS <-
      tf %>%
      XBRL::xbrlParse()

    ## Free the external memory used:
    tf %>%
      unlink()
    data <-
      tibble(
        idCIK = cik,
        urlSECFiling = url,
        dataFacts = list(df_fct),
        dataContexts = list(df_cts),
        dataUnits = list(df_unt),
        dataFootnotes = list(df_footnotes),
        dataRoles = list(df_rls),
        dataCalculations = list(df_calcs) ,
        dataDefinitions = list(df_defs),
        dataLabel = list(df_labels)
      )
    td %>% unlink()
    tf %>% unlink()
    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    return(data)
  }

# dictionaries ------------------------------------------------------------
.sec_form_title_df <-
  function() {
    tibble(
      nameSEC = c(
        "conversionOrExercisePrice",
        "deemedExecutionDate",
        "directOrIndirectOwnership",
        "documentType",
        "equitySwapInvolved",
        "exerciseDate",
        "expirationDate",
        "footnote",
        "isDirector",
        "isOfficer",
        "isOther",
        "issuerCik",
        "issuerName",
        "issuerTradingSymbol",
        "isTenPercentOwner",
        "natureOfOwnership",
        "noSecuritiesOwned",
        "notSubjectToSection16",
        "officerTitle",
        "otherText",
        "periodOfReport",
        "postTransactionAmountsOwnedFollowingTransaction",
        "remarks",
        "rptOwnerCik",
        "rptOwnerCity",
        "rptOwnerName",
        "rptOwnerState",
        "rptOwnerStateDescription",
        "rptOwnerStreet1",
        "rptOwnerStreet2",
        "rptOwnerZipCode",
        "schemaVersion",
        "securityTitle",
        "sharesOwnedFollowingTransaction",
        "signatureDate",
        "signatureName",
        "transactionAcquiredDisposedCode",
        "transactionCode",
        "transactionDate",
        "transactionFormType",
        "transactionPricePerShare",
        "transactionShares",
        "transactionTimeliness",
        "transactionTotalValue",
        "underlyingSecurityShares",
        "underlyingSecurityTitle",
        "clarificationOfResponse", "isBusinessCombinationTransaction",
        "cik", "moreThanOneYear", "previousName", "edgarPreviousNameList",
        "entityName", "entityType", "entityTypeOtherDesc", "federalExemptionsExclusions",
        "industryGroupType", "investmentFundType", "investmentFundInfo",
        "hasNonAccreditedInvestors", "numberNonAccreditedInvestors",
        "totalNumberAlreadyInvested", "city", "stateOrCountry", "stateOrCountryDescription",
        "street1", "street2", "zipCode", "issuerPhoneNumber", "issuerPreviousNameList",
        "jurisdictionOfInc", "overFiveYears", "yearOfInc", "withinFiveYears",
        "yetToBeFormed", "aggregateNetAssetValueRange", "revenueRange",
        "minimumInvestmentAccepted", "totalAmountSold", "totalOfferingAmount",
        "totalRemaining", "firstName", "lastName", "middleName", "relationship",
        "relationshipClarification", "dollarAmount", "isEstimate", "associatedBDCRDNumber",
        "associatedBDName", "foreignSolicitation", "recipientCRDNumber",
        "recipientName", "description", "state", "statesOfSolicitationList",
        "authorizedRepresentative", "nameOfSigner", "signatureTitle",
        "submissionType", "testOrLive", "dateOfFirstSale", "yetToOccur",
        "isAmendment", "descriptionOfOtherType", "isDebtType", "isEquityType",
        "isMineralPropertyType", "isOptionToAcquireType", "isOtherType",
        "isPooledInvestmentFundType", "isSecurityToBeAcquiredType", "isTenantInCommonType",
        'notSubjectToSection16', 'rptOwnerStreet1', 'rptOwnerStreet2',

        "liveTestFlag", "confirmingCopyFlag", "returnCopyFlag", "overrideInternetFlag",
        "ccc", "reportCalendarOrQuarter", "filingManagername", "filingManageraddressstreet1",
        "filingManageraddressstreet2", "filingManageraddresscity", "filingManageraddressstateOrCountry",
        'filingManagerstateOrCountryDescription',
        "filingManageraddresszipCode", "reportType", "form13FFileNumber",
        "provideInfoForInstruction5", "name", "title", "phone", "signature",
        "otherIncludedManagersCount", "tableEntryTotal", "tableValueTotal",
        "isConfidentialOmitted",
        "nameOfIssuer", "titleOfClass", "cusip", "value", "investmentDiscretion",
        "otherManager", "putCall", "sshPrnamt", "sshPrnamtType", "Sole",
        "Shared", "None",

        "offeringFileNumber", "sinceLastFiling", "jurisdictionOrganization",
        "yearIncorporation", "sicCode", "irsNum", "fullTimeEmployees",
        "partTimeEmployees", "phoneNumber", "connectionName", "industryGroup",
        "cashEquivalents", "investmentSecurities", "accountsReceivable",
        "propertyPlantEquipment", "totalAssets", "accountsPayable", "longTermDebt",
        "totalLiabilities", "totalStockholderEquity", "totalLiabilitiesAndEquity",
        "totalRevenues", "costAndExpensesApplToRevenues", "depreciationAndAmortization",
        "netIncome", "earningsPerShareBasic", "earningsPerShareDiluted",
        "nameAuditor", "commonEquityClassName", "outstandingCommonEquity",
        "commonCusipEquity", "publiclyTradedCommonEquity", "preferredEquityClassName",
        "outstandingPreferredEquity", "preferredCusipEquity", "publiclyTradedPreferredEquity",
        "debtSecuritiesClassName", "outstandingDebtSecurities", "cusipDebtSecurities",
        "publiclyTradedDebtSecurities", "certifyIfTrue", "certifyIfNotDisqualified",
        "summaryInfo", "financialStatementAuditStatus", "securitiesOfferedTypes",
        "offerDelayedContinuousFlag", "offeringYearFlag", "offeringAfterQualifFlag",
        "offeringBestEffortsFlag", "solicitationProposedOfferingFlag",
        "resaleSecuritiesAffiliatesFlag", "securitiesOffered", "outstandingSecurities",
        "pricePerSecurity", "issuerAggregateOffering", "securityHolderAggegate",
        "qualificationOfferingAggregate", "concurrentOfferingAggregate",
        "totalAggregateOffering", "underwritersServiceProviderName",
        "underwritersFees", "auditorServiceProviderName", "auditorFees",
        "legalServiceProviderName", "legalFees", "promotersServiceProviderName",
        "promotersFees", "brokerDealerCrdNumber", "estimatedNetAmount",
        "clarificationResponses", "jurisdictionsOfSecOfferedSame", "issueJuridicationSecuritiesOffering",
        "dealersJuridicationSecuritiesOffering", "securitiesIssuerName",
        "securitiesIssuerTitle", "securitiesIssuedTotalAmount", "securitiesPrincipalHolderAmount",
        "securitiesIssuedAggregateAmount", "securitiesActExcemption",
        "certifyIfBadActor", "salesCommissionsServiceProviderName",
        "salesCommissionsServiceProviderFees", "jurisdictionsOfSecOfferedNone",
        "ifUnregsiteredNone", "blueSkyServiceProviderName", "blueSkyFees",
        'indicateTier1Tier2Offering', 'X.1.A.A.', 'X.1.A.A.', 'aggregateConsiderationBasis',
        'findersFeesServiceProviderName' , 'finderFeesFee',
        'loans', 'propertyAndEquipment', 'deposits', 'totalInterestIncome',
        'totalInterestExpenses', 'securitiesOfferedOtherDesc', 'comment',
        "assetTypeNumber",
        "assetNumber",
        "assetGroupNumber",
        "reportPeriodBeginningDate",
        "reportPeriodEndDate",
        "issuerName",
        "originalIssuanceDate",
        "originalSecurityAmount",
        "originalSecurityTermNumber",
        "securityMaturityDate",
        "originalAmortizationTermNumber",
        "originalInterestRatePercentage",
        "accrualTypeCode",
        "interestRateTypeCode",
        "originalInterestOnlyTermNumber",
        "firstPaymentDate",
        "underwritingIndicator",
        "securityTitleName",
        "denominationNumber",
        "currencyName",
        "trusteeName",
        "secFileNumber",
        "cik",
        "callableIndicator",
        "paymentFrequencyCode",
        "zeroCouponIndicator",
        "assetAddedIndicator",
        "assetModifiedIndicator",
        "reportPeriodBeginningAssetBalanceAmount",
        "reportPeriodBeginningScheduledAssetBalanceAmount",
        "reportPeriodScheduledPaymentAmount",
        "reportPeriodInterestRatePercentage",
        "totalActualPaidAmount",
        "actualInterestCollectionPercentage",
        "actualPrincipalCollectedAmount",
        "actualOtherCollectionAmount",
        "otherPrincipalAdjustmentAmount",
        "otherInterestAdjustmentAmount",
        "scheduledInterestAmount",
        "scheduledPrincipalAmount",
        "endReportingPeriodActualBalanceAmount",
        "endReportingPeriodScheduledBalanceAmount",
        "servicingFeePercentage",
        "servicingFlatFeeAmount",
        "zeroBalanceCode",
        "zeroBalanceEffectiveDate",
        "remainingTermToMaturityNumber",
        "currentDelinquentStatusNumber",
        "paymentPastDueDaysNumber",
        "paymentPastDueNumber",
        "nextReportPeriodPaymentDueAmount",
        "nextDueDate",
        "primaryLoanServicerName",
        "mostRecentServicingTransferReceivedDate",
        "assetSubjectToDemandIndicator",
        "statusAssetSubjectToDemandCode",
        "repurchaseAmount",
        "demandResolutionDate",
        "repurchaserName",
        "repurchaseReplacementReasonCode",
        "reportPeriodBeginDate",
        "originalLoanPurposeCode",
        "originatorName",
        "originalLoanAmount",
        "originalLoanMaturityDate",
        "originalInterestRateTypeCode",
        "originalLienPositionCode",
        "mostRecentJuniorLoanBalanceAmount",
        "mostRecentJuniorLoanBalanceDate",
        "mostRecentSeniorLoanAmount",
        "mostRecentSeniorLoanAmountDate",
        "loanTypeMostSeniorLienCode",
        "mostSeniorLienHybridPeriodNumber",
        "mostSeniorLienNegativeAmortizationLimitPercentage",
        "mostSeniorLienOriginationDate",
        "prepaymentPenaltyIndicator",
        "negativeAmortizationIndicator",
        "modificationIndicator",
        "modificationNumber",
        "mortgageInsuranceRequirementIndicator",
        "balloonIndicator",
        "coveredHighCostCode",
        "servicerHazardInsuranceCode",
        "refinanceCashOutAmount",
        "totalOriginationDiscountAmount",
        "brokerIndicator",
        "channelCode",
        "nationalMortgageLicenseSystemCompanyNumber",
        "buyDownNumber",
        "loanDelinquencyAdvanceNumber",
        "originationARMIndexCode",
        "armMarginPercentage",
        "fullyIndexedRatePercentage",
        "initialFixedRatePeriodHybridARMNumber",
        "initialInterestRateDecreasePercentage",
        "initialInterestRateIncreasePercentage",
        "indexLookbackNumber",
        "subsequentInterestRateResetNumber",
        "lifetimeRateCeilingPercentage",
        "lifetimeRateFloorPercentage",
        "subsequentInterestRateDecreasePercentage",
        "subsequentInterestRateIncreasePercentage",
        "subsequentPaymentResetNumber",
        "armRoundCode",
        "armRoundPercentage",
        "optionArmIndicator",
        "paymentMethodAfterRecastCode",
        "initialMinimumPaymentAmount",
        "convertibleIndicator",
        "HELOCIndicator",
        "HELOCDrawNumber",
        "prepaymentPenaltyCalculationCode",
        "prepaymentPenaltyTypeCode",
        "prepaymentPenaltyTotalTermNumber",
        "prepaymentPenaltyHardTermNumber",
        "negativeAmortizationLimitAmount",
        "negativeAmortizationInitialRecastNumber",
        "negativeAmortizationSubsequentRecastNumber",
        "negativeAmortizationBalanceAmount",
        "initialFixedPaymentNumber",
        "initialPaymentCapPercentage",
        "subsequentPaymentCapPercentage",
        "initialMinimumPaymentResetNumber",
        "subsequentMinimumPaymentResetNumber",
        "minimumPaymentAmount",
        "geographicalLocation",
        "occupancyStatusCode",
        "mostRecentOccupancyStatusCode",
        "propertyTypeCode",
        "mostRecentPropertyValueAmount",
        "mostRecentPropertyValueTypeCode",
        "mostRecentPropertyValueDate",
        "mostRecentAVMModelCode",
        "mostRecentAVMConfidenceNumber",
        "originalCLTVPercentage",
        "originalLTVPercentage",
        "originalObligorNumber",
        "originalObligorCreditScoreNumber",
        "originalObligorCreditScoreType",
        "mostRecentObligorCreditScoreNumber",
        "mostRecentObligorCreditScoreType",
        "mostRecentObligorCreditScoreDate",
        "obligorIncomeVerificationLevelCode",
        "IRSForm4506TIndicator",
        "originatorFrontEndDTIPercentage",
        "originatorBackEndDTIPercentage",
        "obligorEmploymentVerificationCode",
        "obligorEmploymentLengthCode",
        "obligorAssetVerificationCode",
        "originalPledgedAssetsAmount",
        "qualificationMethodCode",
        "mortgageInsuranceCompanyName",
        "mortgageInsuranceCoveragePercentage",
        "poolInsuranceCompanyName",
        "poolInsuranceStopLossPercentage",
        "mortgageInsuranceCoverageTypeCode",
        "modificationIndicatorReportingPeriod",
        "nextPaymentDueDate",
        "advancingMethodCode",
        "servicingAdvanceMethodologyCode",
        "stopPrincipalInterestAdvancingDate",
        "reportingPeriodBeginningLoanBalanceAmount",
        "reportingPeriodBeginningScheduledLoanBalanceAmount",
        "nextReportingPeriodPaymentDueAmount",
        "reportingPeriodInterestRatePercentage",
        "nextInterestRatePercentage",
        "otherAssessedUncollectedServicerFeeamount",
        "otherServicingFeeRetainedByServicerAmount",
        "reportingPeriodEndActualBalanceAmount",
        "reportingPeriodEndScheduledBalanceAmount",
        "reportingPeriodScheduledPaymentAmount",
        "actualInterestCollectedAmount",
        "actualOtherCollectedAmount",
        "paidThroughDate",
        "interestPaidThroughDate",
        "paidFullAmount",
        "servicerAdvancedPrincipalAmount",
        "servicerAdvancedRepaidPrincipalAmount",
        "servicerAdvancedCumulativePrincipalAmount",
        "servicerAdvanceInterestAmount",
        "servicerAdvanceRepaidInterestAmount",
        "servicerAdvanceCumulativeInterestAmount",
        "servicerAdvanceTaxesInsuranceAmount",
        "servicerAdvanceRepaidTaxesInsuranceAmount",
        "servicerAdvanceCumulativeTaxesInsuranceAmount",
        "servicerAdvanceCorporateAmount",
        "servicerAdvanceRepaidCorporateAmount",
        "servicerAdvanceCumulativeCorporateAmount",
        "mostRecentTwelveMonthHistoryCode",
        "nextResetRatePercentage",
        "nextPaymentChangeDate",
        "nextInterestRateChangeDate",
        "nextResetPaymentAmount",
        "exercisedArmConversionOptionIndicator",
        "primaryServicerName",
        "masterServicerName",
        "specialServicerName",
        "subServicerName",
        "assetSubjectDemandIndicator",
        "assetSubjectDemandStatusCode",
        "repurchaseReplacementCode",
        "chargeOffPrincipalAmount",
        "chargeOffInterestAmount",
        "lossMitigationTypeCode",
        "mostRecentLoanModificationEventCode",
        "mostRecentLoanModificationEffectiveDate",
        "postModificationMaturityDate",
        "postModificationInterestRateTypeCode",
        "postModificationAmortizationTypeCode",
        "postModificationInterestPercentage",
        "postModificationFirstPaymentDate",
        "postModificationLoanBalanceAmount",
        "postModificationPrincipalInterestPaymentAmount",
        "totalCapAmount",
        "incomeVerificationIndicatorAtModification",
        "modificationFrontEndDebtToIncomePercentage",
        "modificationBackEndDebtToIncomePercentage",
        "totalDeferredAmount",
        "forgivenPrincipalCumulativeAmount",
        "forgivenPrincipalReportingPeriodAmount",
        "forgivenInterestCumulativeAmount",
        "forgivenInterestReportingPeriodAmount",
        "actualEndingBalanceTotalDebtAmount",
        "scheduledEndingBalanceTotalDebtAmount",
        "postModificationARMCode",
        "postModificationARMIndexCode",
        "postModificationMarginPercentage",
        "postModificationInterestResetNumber",
        "postModificationNextResetDate",
        "postModificationIndexLookbackNumber",
        "postModificationARMRoundingCode",
        "postModificationARMRoundingPercentage",
        "postModificationInitialMinimumPayment",
        "postModificationNextPaymentAdjustmentDate",
        "postModificationARMPaymentRecastFrequency",
        "postModificationLifetimeFloorPercentage",
        "postModificationLifetimeCeilingPercentage",
        "postModificationInitialInterestRateIncreasePercentage",
        "postModificationInitialInterestRateDecreasePercentage",
        "postModificationSubsequentInterestIncreasePercentage",
        "postModificationSubsequentInterestRateDecreasePercentage",
        "postModificationPaymentCapPercentage",
        "postModificationPaymentMethodAfterRecastCode",
        "postModificationARMInterestRateTeaserNumber",
        "postModificationARMPaymentTeaserNumber",
        "postModificationARMNegativeAmortizationIndicator",
        "postModificationARMNegativeAmortizationCapPercentage",
        "postModificationInterestOnlyTermNumber",
        "postModificationInterestOnlyLastPaymentDate",
        "postModificationBalloonAmount",
        "postModificationInterestRateStepIndicator",
        "postModificationStepInterestPercentage",
        "postModificationStepDate",
        "postModificationStepPrincipalInterestPaymentAmount",
        "postModificationStepNumber",
        "postModificationMaximumFutureStepAgreementPercentage",
        "postModificationMaximumStepAgreementRateDate",
        "nonInterestBearingDeferredPrincipalCumulativeAmount",
        "nonInterestBearingDeferredPrincipalReportingPeriodAmount",
        "recoveryDeferredPrincipalReportingPeriodAmount",
        "nonInterestBearingDeferredPaidFullAmount",
        "nonInterestBearingDeferredInterestFeeReportingPeriodAmount",
        "nonInterestBearingDeferredInterestFeeCumulativeAmount",
        "recoveryDeferredInterestFeeReportingPeriodAmount",
        "mostRecentForbearancePlanOrTrialModificationStartDate",
        "mostRecentForbearancePlanOrTrialModificationScheduledEndDate",
        "mostRecentTrialModificationViolatedDate",
        "mostRecentRepaymentPlanStartDate",
        "mostRecentRepaymentPlanScheduledEndDate",
        "mostRecentRepaymentPlanViolatedDate",
        "shortSaleAcceptedOfferAmount",
        "mostRecentLossMitigationExitDate",
        "mostRecentLossMitigationExitCode",
        "attorneyReferralDate",
        "foreclosureDelayReasonCode",
        "foreclosureExitDate",
        "foreclosureExitReasonCode",
        "noticeOfIntentDate",
        "mostRecentAcceptedREOOfferAmount",
        "mostRecentAcceptedREOOfferDate",
        "grossLiquidationProceedsAmount",
        "netSalesProceedsAmount",
        "reportingPeriodLossPassedToIssuingEntityAmount",
        "cumulativeTotalLossPassedToIssuingEntityAmount",
        "subsequentRecoveryAmount",
        "evictionIndicator",
        "reoExitDate",
        "reoExitReasonCode",
        "UPBLiquidationAmount",
        "servicingFeesClaimedAmount",
        "servicerAdvanceReimbursedPrincipalAmount",
        "servicerAdvanceReimbursedInterestAmount",
        "servicerAdvanceReimbursedTaxesInsuranceAmount",
        "servicerAdvanceReimbursedCorporateAmount",
        "REOManagementFeesAmount",
        "cashKeyDeedAmount",
        "performanceIncentiveFeesAmount",
        "mortgageInsuranceClaimFiledDate",
        "mortgageInsuranceClaimAmount",
        "mortgageInsuranceClaimPaidDate",
        "mortgageInsuranceClaimPaidAmount",
        "mortgageInsuranceClaimDeniedRescindedDate",
        "marketableTitleTransferDate",
        "nonPayStatusCode",
        "reportingActionCode",
        "GroupID",
        "reportingPeriodBeginningDate",
        "reportingPeriodEndDate",
        "originationDate",
        "originalTermLoanNumber",
        "maturityDate",
        "interestRateSecuritizationPercentage",
        "interestAccrualMethodCode",
        "firstLoanPaymentDueDate",
        "lienPositionSecuritizationCode",
        "loanStructureCode",
        "paymentTypeCode",
        "periodicPrincipalAndInterestPaymentSecuritizationAmount",
        "scheduledPrincipalBalanceSecuritizationAmount",
        "NumberPropertiesSecuritization",
        "NumberProperties",
        "graceDaysAllowedNumber",
        "interestOnlyIndicator",
        "prepaymentPremiumIndicator",
        "modifiedIndicator",
        "armIndexCode",
        "firstRateAdjustmentDate",
        "firstPaymentAdjustmentDate",
        "armMarginNumber",
        "lifetimeRateCapPercentage",
        "periodicRateIncreaseLimitPercentage",
        "periodicRateDecreaseLimitPercentage",
        "periodicPaymentAdjustmentMaximumAmount",
        "periodicPaymentAdjustmentMaximumPercent",
        "rateResetFrequencyCode",
        "paymentResetFrequencyCode",
        "indexLookbackDaysNumber",
        "prepaymentLockOutEndDate",
        "yieldMaintenanceEndDate",
        "prepaymentPremiumsEndDate",
        "maximumNegativeAmortizationAllowedPercentage",
        "maximumNegativeAmortizationAllowedAmount",
        "negativeAmortizationDeferredInterestCapAmount",
        "deferredInterestCumulativeAmount",
        "deferredInterestCollectedAmount",
        "property",
        "reportPeriodModificationIndicator",
        "reportPeriodBeginningScheduleLoanBalanceAmount",
        "totalScheduledPrincipalInterestDueAmount",
        "servicerTrusteeFeeRatePercentage",
        "unscheduledPrincipalCollectedAmount",
        "reportPeriodEndActualBalanceAmount",
        "reportPeriodEndScheduledLoanBalanceAmount",
        "hyperAmortizingDate",
        "servicingAdvanceMethodCode",
        "nonRecoverabilityIndicator",
        "totalPrincipalInterestAdvancedOutstandingAmount",
        "totalTaxesInsuranceAdvancesOutstandingAmount",
        "otherExpensesAdvancedOutstandingAmount",
        "paymentStatusLoanCode",
        "armIndexRatePercentage",
        "nextInterestRateChangeAdjustmentDate",
        "nextPaymentAdjustmentDate",
        "mostRecentSpecialServicerTransferDate",
        "mostRecentMasterServicerReturnDate",
        "realizedLossToTrustAmount",
        "liquidationPrepaymentCode",
        "liquidationPrepaymentDate",
        "prepaymentPremiumYieldMaintenanceReceivedAmount",
        "workoutStrategyCode",
        "lastModificationDate",
        "modificationCode",
        "postModificationPaymentAmount",
        "postModificationAmortizationPeriodAmount",
        "propertyName",
        "propertyAddress",
        "propertyCity",
        "propertyState",
        "propertyZip",
        "propertyCounty",
        "netRentableSquareFeetNumber",
        "netRentableSquareFeetSecuritizationNumber",
        "unitsBedsRoomsNumber",
        "unitsBedsRoomsSecuritizationNumber",
        "yearBuiltNumber",
        "yearLastRenovated",
        "valuationSecuritizationAmount",
        "valuationSourceSecuritizationCode",
        "valuationSecuritizationDate",
        "mostRecentValuationAmount",
        "mostRecentValuationDate",
        "mostRecentValuationSourceCode",
        "physicalOccupancySecuritizationPercentage",
        "mostRecentPhysicalOccupancyPercentage",
        "propertyStatusCode",
        "defeasanceOptionStartDate",
        "DefeasedStatusCode",
        "largestTenant",
        "squareFeetLargestTenantNumber",
        "leaseExpirationLargestTenantDate",
        "secondLargestTenant",
        "squareFeetSecondLargestTenantNumber",
        "leaseExpirationSecondLargestTenantDate",
        "thirdLargestTenant",
        "squareFeetThirdLargestTenantNumber",
        "leaseExpirationThirdLargestTenantDate",
        "financialsSecuritizationDate",
        "mostRecentFinancialsStartDate",
        "mostRecentFinancialsEndDate",
        "revenueSecuritizationAmount",
        "mostRecentRevenueAmount",
        "operatingExpensesSecuritizationAmount",
        "operatingExpensesAmount",
        "netOperatingIncomeSecuritizationAmount",
        "mostRecentNetOperatingIncomeAmount",
        "netCashFlowFlowSecuritizationAmount",
        "mostRecentNetCashFlowAmount",
        "netOperatingIncomeNetCashFlowSecuritizationCode",
        "netOperatingIncomeNetCashFlowCode",
        "mostRecentDebtServiceAmount",
        "debtServiceCoverageNetOperatingIncomeSecuritizationPercentage",
        "mostRecentDebtServiceCoverageNetOperatingIncomePercentage",
        "debtServiceCoverageNetCashFlowSecuritizationPercentage",
        "mostRecentDebtServiceCoverageNetCashFlowpercentage",
        "debtServiceCoverageSecuritizationCode",
        "mostRecentDebtServiceCoverageCode",
        "mostRecentAnnualLeaseRolloverReviewDate",
        "reportingPeriodEndingDate",
        "originalLoanTerm",
        "loanMaturityDate",
        "interestCalculationTypeCode",
        "originalFirstPaymentDate",
        "gracePeriodNumber",
        "subvented",
        "vehicleManufacturerName",
        "vehicleModelName",
        "vehicleNewUsedCode",
        "vehicleModelYear",
        "vehicleTypeCode",
        "vehicleValueAmount",
        "vehicleValueSourceCode",
        "obligorCreditScoreType",
        "obligorCreditScore",
        "coObligorIndicator",
        "paymentToIncomePercentage",
        "obligorGeographicLocation",
        "reportingPeriodModificationIndicator",
        "nextReportingPeriodPaymentAmountDue",
        "otherServicerFeeRetainedByServicer",
        "otherAssessedUncollectedServicerFeeAmount",
        "reportingPeriodActualEndBalanceAmount",
        "totalActualAmountPaid",
        "servicerAdvancedAmount",
        "currentDelinquencyStatus",
        "chargedoffPrincipalAmount",
        "recoveredAmount",
        "modificationTypeCode",
        "paymentExtendedNumber",
        "repossessedIndicator",
        "repossessedProceedsAmount",
        "reportingPeriodBeginDate",
        "acquisitionCost",
        "originalLeaseTermNumber",
        "scheduledTerminationDate",
        "gracePeriod",
        "baseResidualValue",
        "baseResidualSourceCode",
        "contractResidualValue",
        "lesseeCreditScoreType",
        "lesseeCreditScore",
        "lesseeIncomeVerificationLevelCode",
        "lesseeEmploymentVerificationCode",
        "coLesseePresentIndicator",
        "lesseeGeographicLocation",
        "remainingTermNumber",
        "reportingPeriodSecuritizationValueAmount",
        "securitizationDiscountRate",
        "otherLeaseLevelServicingFeesRetainedAmount",
        "reportingPeriodEndingActualBalanceAmount",
        "reportingPeriodEndActualSecuritizationAmount",
        "primaryLeaseServicerName",
        "DemandResolutionDate",
        "repurchaseOrReplacementReasonCode",
        "chargedOffAmount",
        "leaseExtended",
        "terminationIndicator",
        "excessFeeAmount",
        "liquidationProceedsAmount",
        "commentNumber", "commentColumn", "commentDescription",
        'previousAccessionNumber', 'itemNumber', 'fieldName', 'notes'
      ),
      nameActual = c(
        "priceExerciseConversion",
        "dateDeemedExecution",
        "codeOwnershipDirectIndirect",
        "idDocument",
        "isEquitySwapInvolved",
        "dateExercised",
        "dateExpiration",
        "descriptionFootnote",
        "isDirector",
        "isOfficer",
        "isOther",
        "idCIKIssuer",
        "nameIssuer",
        "idTickerIssuer",
        "isTenPercentOwner",
        "descriptionNatureOfOwnership",
        "isNoSecuritiesOwned",
        "isNotSubjectToSection16",
        "titleOfficer",
        "descriptionOtherText",
        "dateReport",
        "countSharesOwnedPostTransaction",
        "descriptionRemarks",
        "idCIKOwner",
        "cityOwenr",
        "nameOwner",
        "stateOwner",
        "descriptionStateOwner",
        "addressStreet1Owner",
        "addressStreet2Owner",
        "zipcodeOwner",
        "idSchema",
        "titleSecurity",
        "countSharesOwnedPostTransaction",
        "dateSignature",
        "nameSignature",
        "codeTransactionAcquiredDisposed",
        "codeTransaction",
        "dateTransaction",
        "idFormTransaction",
        "pricePerShareTransaction",
        "countSharesTransaction",
        "idCodeTimelinessTransaction",
        "amountTransaction",
        "countSharesUnderlying",
        "titleSecurityUnderlying",
        "descriptionResponse", "isBusinessCombinationTransaction",
        "idCIK", "isMoreThanOneYear", "nameEntityPrevius", "listNameEntityPreviousEDGAR",
        "nameEntity", "typeEntity", "descriptionEntityTypeOther", "idFederalExemptionsExclusions",
        "typeIndustryGroup", "typeInvestmentFund", "descriptionInvestmentFund",
        "hasNonAccreditedInvestors", "countInvestorsNonAccredited",
        "countInvestorsActive", "cityEntity", "stateEntity", "descriptionStateEntity",
        "addressStreet1Entity", "addressStreet2Entity", "zipcodeEntity", "phoneNumberEntity", "listIssuerPreviousName",
        "jurisdictionOfInc", "isOverFiveYearsOld", "hasYearOfInc", "isFormedWithinFiveYears",
        "isYetToBeFormed", "rangeAgregateNetAssetValue", "rangeRevenue",
        "amountInvestmentMinimum", "amountSoldTotal", "amountOfferingTotal",
        "amountRemaining", "nameFirst", "nameLast", "nameMiddle", "relationshipEntity",
        "descriptionRelationship", "amountDollars", "isEstimate", "idCRDBroker",
        "nameBroker", "isForeignSolicitation", "idCRDRecipient",
        "nameRecipient", "stateDescription", "state", "listStatesSolicitation",
        "isAuthorizedRepresentative", "nameSignatory", "titleSignatory",
        "idForm", "codeTestOrLive", "dateFirstSale", "isYetToOccur",
        "isAmendment", "descriptionOtherType", "isDebtType", "isEquityType",
        "isMineralPropertyType", "isOptionToAcquireType", "isOtherType",
        "isPooledInvestmentFundType", "isSecurityToBeAcquiredType", "isTenantInCommonType",
        'isNotSubjectToSection16', 'addressStreet1Owner', 'addressStreet2Owner',
        "isLiveTestFlag", "isConfirmingCopyFlag", "isReturnCopyFlag", "isOverrideInternetFlag",
        "idCCC", "dateReportCalendarOrQuarter", "nameFilingManager", "addressStreet1FilingManager",
        "addressStreet2FilingManager", "cityFilingManager", "stateFilingManager",
        'descriptionStateFilingManager',
        "zipcodeFilingManager", "typeReport", "idSEC",
        "codeProvideInfoForInstruction5", "nameEntity", "titleEntity", "phoneEntity", "signatureEntity",
        "countOtherIncludedManagers", "countTableEntries", "amountValueHoldings",
        "isConfidentialOmitted", "nameIssuer", "classSecurities", "idCUSIP", "valueSecurities", "typeInvestmentDiscretion",
        "descriptionOtherManager", "codePutCall", "countSharesPrincipal", "codeSharesPrincipal", "countSharesVotingSole",
        "countSharesVotingShared", "countSharesVotingNone",

        "idSEC", "isSinceLastFiling", "codeJurisdictionOrganization",
        "yearIncorporation", "idSIC", "idIRS", "countEmployeesFullTime",
        "countEmployeesPartTime", "phoneEntity", "nameConnection", "nameIndustry",
        "amountCashEquivalents", "amountInvestmentSecurities", "amountAccountsReceivable",
        "amountPropertyPlantEquipment", "amountAssetsTotal", "amountAccountsPayable", "amountLongTermDebt",
        "amountLiabilitiesTotal", "amountStockholderEquityTotal", "amountLiabilitiesAndEquityTotal",
        "amountRevenuesTotal", "amountCostAndExpensesOfRevenue", "amountDepreciationAndAmortization",
        "amountNetIncome", "pershareEarningsBasic", "pershareEarningsDiluted",
        "nameAuditor", "nameCommonEquityClass", "amountCommonEquityOutstanding",
        "idCUSIPCommonEquity", "isCommonEquityPublic", "namePreferredEquityClass",
        "amountPreferredEquityOutstanding", "idCusipPreferrdEquity", "isdPreferredEquityPublic",
        "nameDebtSecuritiesClass", "amountOutstandingDebtSecurities", "idCUSIPDebtSecurities",
        "isDebtSecuritiesPublic", "isCertifyIfTrue", "isCertifyIfNotDisqualified",
        "codeTier1Tier2Offering", "codeFinancialStatementAuditStatus", "codeSecuritiesOfferedTypes",
        "codeOfferDelayedContinuous", "codeOfferingYearFlag", "codeOfferingAfterQualifFlag",
        "codeOfferingBestEffortsFlag", "codeSolicitationProposedOfferingFlag",
        "codeResaleSecuritiesAffiliates", "countSecuritiesOffered", "countSecuritiesOutstanding",
        "persharePrice", "amountOfferingIssuer", "amountOfferingExistingShareholdersSelling",
        "amountOfferingSold12MonthQualifiedOffering", "amountOfferingSoldConcurrent",
        "amountOfferingTotal", "nameUnderwritr",
        "amountUnderwritersFees", "nameAuditor", "amountAuditorFees",
        "nameLegal", "amountLegalFees", "namePromoter",
        "amountPromotersFees", "idCRDBroker", "amountOfferringProceedsNet",
        "descriptionResponse", "isJurisdictionsOfSecOfferedSame", "locatonJuridicationSecuritiesOffering",
        "locationDealersJuridicationSecuritiesOffering", "nameSecuritiesIssuer",
        "titleSecuritiesOffered", "amountSecuritiesIssued", "amountSecuritiesPrincipalHolder",
        "amountSecuritiesIssuedTotal", "nameSecuritiesActExemption",
        "isBadActor", "nameSalesCommissionsServiceProvider",
        "amountSalesCommissionsFees", "isJurisdictionsSecuritiesOfferingNone",
        "isUnRegisteredNone",
        "nameBlueSkyServiceProvider", "amountBlueSkyFees",
        'isTier1Tier2Offering', 'idForm', 'idForm', 'amountOfferingConsiderationBasis',
        'nameFindersFeeProvider' , 'amountFindersFee',
        'amountLoans', 'amountPropertyAndEquipment', 'amountDeposits', 'amountInterestIncomeTotal',
        'amountInterestExpenseTotal', 'descriptionOtherSecuritiesOffered',
        'commentFiling',
        "numberAssetType",
        "numberAsset",
        "numberAssetGroup",
        "dateReportPeriodBeginning",
        "dateReportPeriodEnd",
        "nameIssuer",
        "dateOriginalIssuance",
        "amountOriginalSecurity",
        "numberOriginalSecurityTerm",
        "dateSecurityMaturity",
        "numberOriginalAmortizationTerm",
        "percentageOriginalInterestRate",
        "codeAccrualType",
        "codeInterestRateType",
        "numberOriginalInterestOnlyTerm",
        "dateFirstPayment",
        "hasUnderwriting",
        "nameSecurityTitle",
        "numberDenomination",
        "nameCurrency",
        "nameTrustee",
        "numberSecFile",
        "idCIK",
        "hasCallable",
        "codePaymentFrequency",
        "hasZeroCoupon",
        "hasAssetAdded",
        "hasAssetModified",
        "amountReportPeriodBeginningAssetBalance",
        "amountReportPeriodBeginningScheduledAssetBalance",
        "amountReportPeriodScheduledPayment",
        "percentageReportPeriodInterestRate",
        "amountTotalActualPaid",
        "percentageActualInterestCollection",
        "amountActualPrincipalCollected",
        "amountActualOtherCollection",
        "amountOtherPrincipalAdjustment",
        "amountOtherInterestAdjustment",
        "amountScheduledInterest",
        "amountScheduledPrincipal",
        "amountEndReportingPeriodActualBalance",
        "amountEndReportingPeriodScheduledBalance",
        "percentageServicingFee",
        "amountServicingFlatFee",
        "codeZeroBalance",
        "dateZeroBalanceEffective",
        "numberRemainingTermToMaturity",
        "numberCurrentDelinquentStatus",
        "numberPaymentPastDueDays",
        "numberPaymentPastDue",
        "amountNextReportPeriodPaymentDue",
        "dateNextDue",
        "namePrimaryLoanServicer",
        "dateMostRecentServicingTransferReceived",
        "hasAssetSubjectToDemand",
        "codeStatusAssetSubjectToDemand",
        "amountRepurchase",
        "dateDemandResolution",
        "nameRepurchaser",
        "codeRepurchaseReplacementReason",
        "dateReportPeriodBegin",
        "codeOriginalLoanPurpose",
        "nameOriginator",
        "amountOriginalLoan",
        "dateOriginalLoanMaturity",
        "codeOriginalInterestRateType",
        "codeOriginalLienPosition",
        "amountMostRecentJuniorLoanBalance",
        "dateMostRecentJuniorLoanBalance",
        "amountMostRecentSeniorLoan",
        "dateMostRecentSeniorLoanAmount",
        "codeLoanTypeMostSeniorLien",
        "numberMostSeniorLienHybridPeriod",
        "percentageMostSeniorLienNegativeAmortizationLimit",
        "dateMostSeniorLienOrigination",
        "hasPrepaymentPenalty",
        "hasNegativeAmortization",
        "hasModification",
        "numberModification",
        "hasMortgageInsuranceRequirement",
        "hasBalloon",
        "codeCoveredHighCost",
        "codeServicerHazardInsurance",
        "amountRefinanceCashOut",
        "amountTotalOriginationDiscount",
        "hasBroker",
        "codeChannel",
        "numberNationalMortgageLicenseSystemCompany",
        "numberBuyDown",
        "numberLoanDelinquencyAdvance",
        "codeOriginationARMIndex",
        "percentageArmMargin",
        "percentageFullyIndexedRate",
        "numberInitialFixedRatePeriodHybridARM",
        "percentageInitialInterestRateDecrease",
        "percentageInitialInterestRateIncrease",
        "numberIndexLookback",
        "numberSubsequentInterestRateReset",
        "percentageLifetimeRateCeiling",
        "percentageLifetimeRateFloor",
        "percentageSubsequentInterestRateDecrease",
        "percentageSubsequentInterestRateIncrease",
        "numberSubsequentPaymentReset",
        "codeArmRound",
        "percentageArmRound",
        "hasOptionArm",
        "codePaymentMethodAfterRecast",
        "amountInitialMinimumPayment",
        "hasConvertible",
        "hasHELOC",
        "numberHELOCDraw",
        "codePrepaymentPenaltyCalculation",
        "codePrepaymentPenaltyType",
        "numberPrepaymentPenaltyTotalTerm",
        "numberPrepaymentPenaltyHardTerm",
        "amountNegativeAmortizationLimit",
        "numberNegativeAmortizationInitialRecast",
        "numberNegativeAmortizationSubsequentRecast",
        "amountNegativeAmortizationBalance",
        "numberInitialFixedPayment",
        "percentageInitialPaymentCap",
        "percentageSubsequentPaymentCap",
        "numberInitialMinimumPaymentReset",
        "numberSubsequentMinimumPaymentReset",
        "amountMinimumPayment",
        "locationGeographical",
        "codeOccupancyStatus",
        "codeMostRecentOccupancyStatus",
        "codePropertyType",
        "amountMostRecentPropertyValue",
        "codeMostRecentPropertyValueType",
        "dateMostRecentPropertyValue",
        "codeMostRecentAVMModel",
        "numberMostRecentAVMConfidence",
        "percentageOriginalCLTV",
        "percentageOriginalLTV",
        "numberOriginalObligor",
        "numberOriginalObligorCreditScore",
        "typeOriginalObligorCreditScore",
        "numberMostRecentObligorCreditScore",
        "typeMostRecentObligorCreditScore",
        "dateMostRecentObligorCreditScore",
        "codeObligorIncomeVerificationLevel",
        "hasIRSForm4506T",
        "percentageOriginatorFrontEndDTI",
        "percentageOriginatorBackEndDTI",
        "codeObligorEmploymentVerification",
        "codeObligorEmploymentLength",
        "codeObligorAssetVerification",
        "amountOriginalPledgedAssets",
        "codeQualificationMethod",
        "nameMortgageInsuranceCompany",
        "percentageMortgageInsuranceCoverage",
        "namePoolInsuranceCompany",
        "percentagePoolInsuranceStopLoss",
        "codeMortgageInsuranceCoverageType",
        "periodModificationHasReporting",
        "dateNextPaymentDue",
        "codeAdvancingMethod",
        "codeServicingAdvanceMethodology",
        "dateStopPrincipalInterestAdvancing",
        "amountReportingPeriodBeginningLoanBalance",
        "amountReportingPeriodBeginningScheduledLoanBalance",
        "amountNextReportingPeriodPaymentDue",
        "percentageReportingPeriodInterestRate",
        "percentageNextInterestRate",
        "feeamountOtherAssessedUncollectedServicer",
        "amountOtherServicingFeeRetainedByServicer",
        "amountReportingPeriodEndActualBalance",
        "amountReportingPeriodEndScheduledBalance",
        "amountReportingPeriodScheduledPayment",
        "amountActualInterestCollected",
        "amountActualOtherCollected",
        "datePaidThrough",
        "dateInterestPaidThrough",
        "amountPaidFull",
        "amountServicerAdvancedPrincipal",
        "amountServicerAdvancedRepaidPrincipal",
        "amountServicerAdvancedCumulativePrincipal",
        "amountServicerAdvanceInterest",
        "amountServicerAdvanceRepaidInterest",
        "amountServicerAdvanceCumulativeInterest",
        "amountServicerAdvanceTaxesInsurance",
        "amountServicerAdvanceRepaidTaxesInsurance",
        "amountServicerAdvanceCumulativeTaxesInsurance",
        "amountServicerAdvanceCorporate",
        "amountServicerAdvanceRepaidCorporate",
        "amountServicerAdvanceCumulativeCorporate",
        "codeMostRecentTwelveMonthHistory",
        "percentageNextResetRate",
        "dateNextPaymentChange",
        "dateNextInterestRateChange",
        "amountNextResetPayment",
        "hasExercisedArmConversionOption",
        "namePrimaryServicer",
        "nameMasterServicer",
        "nameSpecialServicer",
        "nameSubServicer",
        "hasAssetSubjectDemand",
        "codeAssetSubjectDemandStatus",
        "codeRepurchaseReplacement",
        "amountChargeOffPrincipal",
        "amountChargeOffInterest",
        "codeLossMitigationType",
        "codeMostRecentLoanModificationEvent",
        "dateMostRecentLoanModificationEffective",
        "datePostModificationMaturity",
        "codePostModificationInterestRateType",
        "codePostModificationAmortizationType",
        "percentagePostModificationInterest",
        "datePostModificationFirstPayment",
        "amountPostModificationLoanBalance",
        "amountPostModificationPrincipalInterestPayment",
        "amountTotalCap",
        "modificationIncomeVerificationHasAt",
        "percentageModificationFrontEndDebtToIncome",
        "percentageModificationBackEndDebtToIncome",
        "amountTotalDeferred",
        "amountForgivenPrincipalCumulative",
        "amountForgivenPrincipalReportingPeriod",
        "amountForgivenInterestCumulative",
        "amountForgivenInterestReportingPeriod",
        "amountActualEndingBalanceTotalDebt",
        "amountScheduledEndingBalanceTotalDebt",
        "codePostModificationARM",
        "codePostModificationARMIndex",
        "percentagePostModificationMargin",
        "numberPostModificationInterestReset",
        "datePostModificationNextReset",
        "numberPostModificationIndexLookback",
        "codePostModificationARMRounding",
        "percentagePostModificationARMRounding",
        "paymentPostModificationInitialMinimum",
        "datePostModificationNextPaymentAdjustment",
        "frequencyPostModificationARMPaymentRecast",
        "percentagePostModificationLifetimeFloor",
        "percentagePostModificationLifetimeCeiling",
        "percentagePostModificationInitialInterestRateIncrease",
        "percentagePostModificationInitialInterestRateDecrease",
        "percentagePostModificationSubsequentInterestIncrease",
        "percentagePostModificationSubsequentInterestRateDecrease",
        "percentagePostModificationPaymentCap",
        "codePostModificationPaymentMethodAfterRecast",
        "numberPostModificationARMInterestRateTeaser",
        "numberPostModificationARMPaymentTeaser",
        "hasPostModificationARMNegativeAmortization",
        "percentagePostModificationARMNegativeAmortizationCap",
        "numberPostModificationInterestOnlyTerm",
        "datePostModificationInterestOnlyLastPayment",
        "amountPostModificationBalloon",
        "hasPostModificationInterestRateStep",
        "percentagePostModificationStepInterest",
        "datePostModificationStep",
        "amountPostModificationStepPrincipalInterestPayment",
        "numberPostModificationStep",
        "percentagePostModificationMaximumFutureStepAgreement",
        "datePostModificationMaximumStepAgreementRate",
        "amountNonInterestBearingDeferredPrincipalCumulative",
        "amountNonInterestBearingDeferredPrincipalReportingPeriod",
        "amountRecoveryDeferredPrincipalReportingPeriod",
        "amountNonInterestBearingDeferredPaidFull",
        "amountNonInterestBearingDeferredInterestFeeReportingPeriod",
        "amountNonInterestBearingDeferredInterestFeeCumulative",
        "amountRecoveryDeferredInterestFeeReportingPeriod",
        "dateMostRecentForbearancePlanOrTrialModificationStart",
        "dateMostRecentForbearancePlanOrTrialModificationScheduledEnd",
        "dateMostRecentTrialModificationViolated",
        "dateMostRecentRepaymentPlanStart",
        "dateMostRecentRepaymentPlanScheduledEnd",
        "dateMostRecentRepaymentPlanViolated",
        "amountShortSaleAcceptedOffer",
        "dateMostRecentLossMitigationExit",
        "codeMostRecentLossMitigationExit",
        "dateAttorneyReferral",
        "codeForeclosureDelayReason",
        "dateForeclosureExit",
        "codeForeclosureExitReason",
        "dateNoticeOfIntent",
        "amountMostRecentAcceptedREOOffer",
        "dateMostRecentAcceptedREOOffer",
        "amountGrossLiquidationProceeds",
        "amountNetSalesProceeds",
        "amountReportingPeriodLossPassedToIssuingEntity",
        "amountCumulativeTotalLossPassedToIssuingEntity",
        "amountSubsequentRecovery",
        "hasEviction",
        "dateReoExit",
        "codeReoExitReason",
        "amountUPBLiquidation",
        "amountServicingFeesClaimed",
        "amountServicerAdvanceReimbursedPrincipal",
        "amountServicerAdvanceReimbursedInterest",
        "amountServicerAdvanceReimbursedTaxesInsurance",
        "amountServicerAdvanceReimbursedCorporate",
        "amountREOManagementFees",
        "amountCashKeyDeed",
        "amountPerformanceIncentiveFees",
        "dateMortgageInsuranceClaimFiled",
        "amountMortgageInsuranceClaim",
        "dateMortgageInsuranceClaimPaid",
        "amountMortgageInsuranceClaimPaid",
        "dateMortgageInsuranceClaimDeniedRescinded",
        "dateMarketableTitleTransfer",
        "codeNonPayStatus",
        "codeReportingAction",
        "idGroup",
        "dateReportingPeriodBeginning",
        "dateReportingPeriodEnd",
        "dateOrigination",
        "numberOriginalTermLoan",
        "dateMaturity",
        "percentageInterestRateSecuritization",
        "codeInterestAccrualMethod",
        "dateFirstLoanPaymentDue",
        "codeLienPositionSecuritization",
        "codeLoanStructure",
        "codePaymentType",
        "amountPeriodicPrincipalAndInterestPaymentSecuritization",
        "amountScheduledPrincipalBalanceSecuritization",
        "securitizationNumberProperties",
        "propertiesNumber",
        "numberGraceDaysAllowed",
        "hasInterestOnly",
        "hasPrepaymentPremium",
        "hasModified",
        "codeArmIndex",
        "dateFirstRateAdjustment",
        "dateFirstPaymentAdjustment",
        "numberArmMargin",
        "percentageLifetimeRateCap",
        "percentagePeriodicRateIncreaseLimit",
        "percentagePeriodicRateDecreaseLimit",
        "amountPeriodicPaymentAdjustmentMaximum",
        "percentPeriodicPaymentAdjustmentMaximum",
        "codeRateResetFrequency",
        "codePaymentResetFrequency",
        "numberIndexLookbackDays",
        "datePrepaymentLockOutEnd",
        "dateYieldMaintenanceEnd",
        "datePrepaymentPremiumsEnd",
        "percentageMaximumNegativeAmortizationAllowed",
        "amountMaximumNegativeAmortizationAllowed",
        "amountNegativeAmortizationDeferredInterestCap",
        "amountDeferredInterestCumulative",
        "amountDeferredInterestCollected",
        "propertyProperty",
        "hasReportPeriodModification",
        "amountReportPeriodBeginningScheduleLoanBalance",
        "amountTotalScheduledPrincipalInterestDue",
        "percentageServicerTrusteeFeeRate",
        "amountUnscheduledPrincipalCollected",
        "amountReportPeriodEndActualBalance",
        "amountReportPeriodEndScheduledLoanBalance",
        "dateHyperAmortizing",
        "codeServicingAdvanceMethod",
        "hasNonRecoverability",
        "amountTotalPrincipalInterestAdvancedOutstanding",
        "amountTotalTaxesInsuranceAdvancesOutstanding",
        "amountOtherExpensesAdvancedOutstanding",
        "codePaymentStatusLoan",
        "percentageArmIndexRate",
        "dateNextInterestRateChangeAdjustment",
        "dateNextPaymentAdjustment",
        "dateMostRecentSpecialServicerTransfer",
        "dateMostRecentMasterServicerReturn",
        "amountRealizedLossToTrust",
        "codeLiquidationPrepayment",
        "dateLiquidationPrepayment",
        "amountPrepaymentPremiumYieldMaintenanceReceived",
        "codeWorkoutStrategy",
        "dateLastModification",
        "codeModification",
        "amountPostModificationPayment",
        "amountPostModificationAmortizationPeriod",
        "nameProperty",
        "addressProperty",
        "cityProperty",
        "stateProperty",
        "zipcodeProperty",
        "countyProperty",
        "numberNetRentableSquareFeet",
        "numberNetRentableSquareFeetSecuritization",
        "numberUnitsBedsRooms",
        "numberUnitsBedsRoomsSecuritization",
        "yearBuilt",
        "yearLastRenovated",
        "amountValuationSecuritization",
        "codeValuationSourceSecuritization",
        "dateValuationSecuritization",
        "amountMostRecentValuation",
        "dateMostRecentValuation",
        "codeMostRecentValuationSource",
        "percentagePhysicalOccupancySecuritization",
        "percentageMostRecentPhysicalOccupancy",
        "codePropertyStatus",
        "dateDefeasanceOptionStart",
        "codeDefeasedStatus",
        "tenantLargest",
        "numberSquareFeetLargestTenant",
        "dateLeaseExpirationLargestTenant",
        "tenantSecondLargest",
        "numberSquareFeetSecondLargestTenant",
        "dateLeaseExpirationSecondLargestTenant",
        "tenantThirdLargest",
        "numberSquareFeetThirdLargestTenant",
        "dateLeaseExpirationThirdLargestTenant",
        "dateFinancialsSecuritization",
        "dateMostRecentFinancialsStart",
        "dateMostRecentFinancialsEnd",
        "amountRevenueSecuritization",
        "amountMostRecentRevenue",
        "amountOperatingExpensesSecuritization",
        "amountOperatingExpenses",
        "amountNetOperatingIncomeSecuritization",
        "amountMostRecentNetOperatingIncome",
        "amountNetCashFlowFlowSecuritization",
        "amountMostRecentNetCashFlow",
        "codeNetOperatingIncomeNetCashFlowSecuritization",
        "codeNetOperatingIncomeNetCashFlow",
        "amountMostRecentDebtService",
        "percentageDebtServiceCoverageNetOperatingIncomeSecuritization",
        "percentageMostRecentDebtServiceCoverageNetOperatingIncome",
        "percentageDebtServiceCoverageNetCashFlowSecuritization",
        "percentageMostRecentDebtServiceCoverageNetCash",
        "codeDebtServiceCoverageSecuritization",
        "codeMostRecentDebtServiceCoverage",
        "dateMostRecentAnnualLeaseRolloverReview",
        "dateReportingPeriodEnding",
        "termOriginalLoan",
        "dateLoanMaturity",
        "codeInterestCalculationType",
        "dateOriginalFirstPayment",
        "numberGracePeriod",
        "subventedSubvented",
        "nameVehicleManufacturer",
        "nameVehicleModel",
        "codeVehicleNewUsed",
        "yearVehicleModel",
        "codeVehicleType",
        "amountVehicleValue",
        "codeVehicleValueSource",
        "typeObligorCreditScore",
        "scoreObligorCredit",
        "hasCoObligor",
        "percentagePaymentToIncome",
        "locationObligorGeographic",
        "hasReportingPeriodModification",
        "amountPaymentDueNextReportingPeriod",
        "servicerOtherServicerFeeRetainedBy",
        "amountOtherAssessedUncollectedServicerFee",
        "amountReportingPeriodActualEndBalance",
        "amountPaidTotalActual",
        "amountServicerAdvanced",
        "isDelinquent",
        "amountChargedoffPrincipal",
        "amountRecovered",
        "codeModificationType",
        "numberPaymentExtended",
        "hasRepossessed",
        "amountRepossessedProceeds",
        "dateReportingPeriodBegin",
        "costAcquisition",
        "numberOriginalLeaseTerm",
        "dateScheduledTermination",
        "periodGrace",
        "valueBaseResidual",
        "codeBaseResidualSource",
        "valueContractResidual",
        "typeLesseeCreditScore",
        "scoreLesseeCredit",
        "codeLesseeIncomeVerificationLevel",
        "codeLesseeEmploymentVerification",
        "hasCoLesseePresent",
        "locationLesseeGeographic",
        "numberRemainingTerm",
        "amountReportingPeriodSecuritizationValue",
        "rateSecuritizationDiscount",
        "amountOtherLeaseLevelServicingFeesRetained",
        "amountReportingPeriodEndingActualBalance",
        "amountReportingPeriodEndActualSecuritization",
        "namePrimaryLeaseServicer",
        "dateDemandResolution",
        "codeRepurchaseOrReplacementReason",
        "amountChargedOff",
        "extendedLease",
        "hasTermination",
        "amountExcessFee",
        "amountLiquidationProceeds",
        "detailNumberComment", "columnComment", "descriptionComment",
        'idAccessionPrevious',
        'numberItem', 'nameField', 'descriptionNotes'
      )
    )}

.filer_type_df <-
  function() {
    tibble(
      idTypeFilerOwner = c(
        'insider',
        'private' ,
        'broker_dealer',
        'transfer_agent',
        'ia',
        'msd',
        'bank',
        'inv_co'
      ),
      typeFilerOwner = c(
        'Insider',
        'Private Placement',
        'Broker Dealer',
        'Transfer Agent',
        'Investment Advisor',
        'Bank',
        'Municipal Securities Dealer',
        'Investment Company'
      )
    ) %>%
      mutate_all(str_to_upper)
  }

#' Form-D dictionary
#'
#' This function returns searchable
#' industries for parsed SEC Form-D
#' filings
#'
#' @return a \code{tibble}
#' @export
#' @import dplyr
#' @examples
#' dictionary_form_d_categories()
dictionary_form_d_categories <-
  function() {
    category_df <-
      dplyr::tibble(
        idIndustry = 1:35,
        nameIndustry = c(
          "AGRICULTURE",
          "AIRLINES AND AIRPORTS",
          "BIOTECHNOLOGY",
          "BUSINESS SERVICES",
          "COAL MINING",
          "COMMERCIAL REAL ESTATE",
          "COMMERCIAL BANKING",
          "COMPUTERS",
          "CONSTRUCTION",
          "ELECTRIC UTILITIES",
          "ENERGY CONSERVATION",
          "ENVIORNMENTAL SERVICES",
          "HEALTH INSURANCE",
          "HOSPITALS AND PHYSICIANS",
          "INSURANCE",
          "INVESTING",
          "INVESTMENT BANKING",
          "LODGING AND CONVETION",
          "MANUFACTURING",
          "OIL AND GAS",
          "OTHER",
          "OTHER BANKING AND FINANCIAL SERVICES",
          "OTHER ENERGY",
          "OTHER HEALTH CARE",
          "OTHER REAL ESTATE",
          "OTHER TECHNOLOGY",
          "OTHER TRAVEL",
          "PHARMACEUTICALS",
          "POOLED INVESTMENT FUND",
          "REITS AND FINANCE",
          "RESIDENTIAL REAL ESTATE",
          "RESTAURANTS",
          "RETAIL",
          "TELECOMMUNICATIONS",
          "TRAVEL AND TOURISM"
        ),
        codeIndustryParent = c(
          "OTHER",
          "TRAVEL",
          "HEALTH",
          "OTHER",
          "ENERGY",
          "REAL",
          "FINANCE",
          "TECH",
          "REAL",
          "ENERGY",
          "ENERGY",
          "ENERGY",
          "HEALTH",
          "HEALTH",
          "FINANCE",
          "FINANCE",
          "FINANCE",
          "TRAVEL",
          "OTHER",
          "ENERGY",
          "OTHER",
          "FINANCE",
          "ENERGY",
          "HEALTH",
          "REAL",
          "TECH",
          "TRAVEL",
          "HEALTH",
          "FINANCE",
          "REAL",
          "REAL",
          "OTHER",
          "OTHER",
          "TECH",
          "TRAVEL"
        ),
        nameIndustryParent = c(
          "OTHER",
          "TRAVEL AND LEISURE",
          "HEALTHCARE",
          "OTHER",
          "ENERGY",
          "REAL ESTATE",
          "FINANCIAL",
          "TECHNOLOGY",
          "REAL ESTATE",
          "ENERGY",
          "ENERGY",
          "ENERGY",
          "HEALTHCARE",
          "HEALTHCARE",
          "FINANCIAL",
          "FINANCIAL",
          "FINANCIAL",
          "TRAVEL AND LEISURE",
          "OTHER",
          "ENERGY",
          "OTHER",
          "FINANCIAL",
          "ENERGY",
          "HEALTHCARE",
          "REAL ESTATE",
          "TECHNOLOGY",
          "TRAVEL AND LEISURE",
          "HEALTHCARE",
          "FINANCIAL",
          "REAL ESTATE",
          "REAL ESTATE",
          "OTHER",
          "OTHER",
          "TECHNOLOGY",
          "TRAVEL AND LEISURE"
        )
      )
    return(category_df)
  }

.insider_code_df <-
  function() {
    insider_df <-
      tibble(
        idInsiderTransaction =
          c(
            "A",
            "C",
            "D",
            "F",
            "G",
            "H",
            "I",
            "J",
            "K",
            "L",
            "M",
            "NONE",
            "O",
            "P",
            "S",
            "U",
            "V",
            "W",
            "X",
            "Z"
          ),
        nameInsiderTransaction = c(
          "AWARD",
          "CONVEYANCE",
          "DISPOSITION TO ISSUER",
          "PAYMENT WITH SECURITIES",
          "GIFT",
          "EXPIRATION OF LONG DERIVATIVE POSITION",
          "DISCRETIONARY TRANSACTION",
          "OTHER",
          "EQUITY SWAP OR SIMILAR",
          "SMALL ACQUISITIONS",
          "EXEMPT",
          NA,
          "OTM EXERCISE",
          "PURCHASE",
          "SALE",
          "MERGER AND ACQUISITION",
          "REPORTED EARLY",
          "WILL OR LAWS OF DESCENT",
          "ITM OR ATM EXERCISE",
          "DEPOSIT INTO/WITHDRAWAL FROM VOTING TRUST"
        ),
        idTypeInsiderTransaction = c(
          "A",
          "D",
          "D",
          "D",
          "D",
          NA,
          NA,
          NA,
          NA,
          "A",
          "A",
          NA,
          "A",
          "A",
          "D",
          NA,
          NA,
          "D",
          "A",
          "D"
        )
      )
    return(insider_df)
  }

#' SEC filing code dictionary
#'
#' This function returns a
#' dictionary of SEC form filing types
#'
#' @return a \code{tibble}
#' @export
#' @import dplyr stringr
#' @family SEC
#' @family dictionary
#'
#' @examples
#' dictionary_sec_filing_codes()
dictionary_sec_filing_codes <-
  function() {
    tibble(
      idFormType = c(
        "1.01",
        "1.02",
        "1.03",
        "1.04",
        "2.01",
        "2.02",
        "2.03",
        "2.04",
        "2.05",
        "2.06",
        "3.01",
        "3.02",
        "3.03",
        "4.01",
        "4.02",
        "5.01",
        "5.02",
        "5.03",
        "5.04",
        "5.05",
        "5.06",
        "5.07",
        "5.08",
        "6.01",
        "6.02",
        "6.03",
        "6.04",
        "6.05",
        "7.01",
        "8.01",
        "9.01"
      ),
      nameFormType = c(
        "Entry into a Material Definitive Agreement",
        "Termination of a Material Definitive Agreement",
        "Bankruptcy or Receivership",
        "Mine Safety Reporting of Shutdowns and Patterns of Violations",
        "Completion of Acquisition or Disposition of Assets",
        "Results of Operations and Financial Condition",
        "Creation of a Direct Financial Obligation or an Obligation under an Off-Balance Sheet Arrangement of a Registrant",
        "Triggering Events That Accelerate or Increase a Direct Financial Obligation or an Obligation under an Off-Balance Sheet Arrangement",
        "Costs Associated with Exit or Disposal Activities",
        "Material Impairments",
        "Notice of Delisting or Failure to Satisfy a Continued Listing Rule or Standard; Transfer of Listing",
        "Unregistered Sales of Equity Securities",
        "Material Modification to Rights of Security Holders",
        "Changes in Registrant's Certifying Accountant",
        "Non-Reliance on Previously Issued Financial Statements or a Related Audit Report or Completed Interim Review",
        "Changes in Control of Registrant",
        "Departure of Directors or Certain Officers; Election of Directors; Appointment of Certain Officers; Compensatory Arrangements of Certain Officers",
        "Amendments to Articles of Incorporation or Bylaws; Change in Fiscal Year",
        "Temporary Suspension of Trading Under Registrant's Employee Benefit Plans",
        "Amendments to the Registrant's Code of Ethics, or Waiver of a Provision of the Code of Ethics",
        "Change in Shell Company Status",
        "Submission of Matters to a Vote of Security Holders",
        "Shareholder Director Nominations",
        "ABS Informational and Computational Material",
        "Change of Servicer or Trustee",
        "Change in Credit Enhancement or Other External Support",
        "Failure to Make a Required Distribution",
        "Securities Act Updating Disclosure",
        "Regulation FD Disclosure",
        "Other Events",
        "Financial Statements and Exhibits"
      ) %>% stringr::str_to_upper()
    )

  }

#' SEC form codes
#'
#' This function returns a
#' dictionary of SEC form codes
#'
#' @return a \code{tibble}
#' @export
#' @family SEC
#' @family dictionary
#'
#' @examples
#' dictionary_sec_form_codes()
dictionary_sec_form_codes <-
  function() {
    tibble(
      idForm = c(
        "R",
        "A",
        "Q",
        "CR",
        "REG",
        "REGX",
        "O",
        "P",
        "X",
        "W",
        "SEC",
        "PROXY",
        "CT",
        "IS",
        "CO",
        "T"
      ),
      nameForm = c(
        "Other Report",
        "Annual Report",
        "Quarterly Report",
        "Current Report",
        "Registration",
        "Private Offering",
        "Ownership",
        "Prospectus",
        "Exemption",
        "Withdrawal",
        "SEC Correspondence",
        "Proxy Statement",
        "Confidential Treatment",
        "Initial Statement",
        "Change in Ownership",
        "Trades"
      ) %>% stringr::str_to_upper()
    )
  }

.company_type_df <-
  function() {
    tibble(
      idCompanyType = c(
        "ic",
        "i",
        "ia",
        "bd",
        "m",
        "t",
        "b",
        "c",
        "p",
        "etf",
        "mmf",
        "mf",
        "uit",
        "cef"
      ),
      nameCompanyType = c(
        "Investment Company",
        "Insider",
        "Investment Adviser",
        "Broker-dealer",
        "Municipal Securities Dealer",
        "Transfer Agent",
        "Bank",
        "Company",
        "Private Issuer",
        "ETF",
        "Money Market Fund",
        "Mutual Fund",
        "UIT",
        "Closed-end Fund"
      )
    )
  }

#' SEC Rule dictionary
#'
#' This function retuns a
#' dictionary of SEC rules
#'
#' @return a \code{tibble}
#' @export
#' @import dplyr stringr
#'
#' @examples
#' dictionary_sec_rules()
dictionary_sec_rules <-
  function() {
    tibble(
      idRule = c(
        "06",
        "3C",
        "3C.7",
        "3C.1",
        "06b",
        "04",
        "46",
        "04.1",
        "04.2",
        "04.3",
        "05",
        "3C.6",
        "3C.5",
        "06c",
        "4a5",
        "3C.11",
        "3C.2",
        "3C.3",
        "3C.9",
        "3C.10",
        "3C.4",
        "3C.12",
        "3C.",
        "3C.14",
        "3"
      ),
      nameRule = c(
        "Rule 506",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Rule 506b",
        "Rule 504",
        "Rule 506c",
        "Rule 504b(1)(i)",
        "Rule 504b(1)(ii)",
        "Rule 504b(1)(iii)",
        "Rule 505",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Rule 506c",
        "Securities Act Section 4(a)(5)",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c"
      )
    ) %>%
      mutate_all(str_to_upper)
  }



# form_parsing ------------------------------------------------------------


.parse_full_form_names <-
  function(sec_names) {
    df_names <-
      seq_along(sec_names) %>%
      future_map_dfr(function(x) {
        sec_name <-
          sec_names[[x]]

        name_pieces <-
          sec_name %>% str_replace_all('\\.value|\\.item', '')

        pieces <-
          name_pieces %>%
          str_split('\\.') %>%
          flatten_chr()

        pieces_no_num <-
          pieces[!pieces %>% str_detect("[0-9]")]
        peice_length <-
          pieces_no_num %>% length()

        is_street <-
          pieces %>% str_detect("street1|street2|Street1|Street2") %>% sum(na.rm = T) > 0

        name_item <-
          pieces_no_num[length(pieces_no_num)]

        if (sec_name %>% str_detect('filingManager')) {
          name_item <-
            pieces %>% paste0(collapse = '')

          df <-
            tibble(nameSECFull = sec_name,
                   nameSEC = name_item)
          return(df)
        }

        if (is_street) {
          name_item <-
            pieces[pieces %>% str_detect("street1|street2|Street1|Street2")]
        }

        is_sig <-
          name_pieces %>% str_detect('signature') & peice_length == 1

        is_footnote <-
          sec_name %>% str_detect('footnote')

        is_issuer <-
          sec_name %>% str_detect('\\issuer.[A-Z]')

        is_federal <-
          sec_name %>% str_detect(pattern = "federalExemptionsExclusions")

        if (is_federal) {
          df <-
            tibble(
              nameSECFull = sec_name,
              nameTable = pieces[[1]],
              nameSEC = name_item
            )

          return(df)
        }

        if (is_issuer) {

          items <-
            sec_name %>% str_split('\\.') %>% flatten_chr()

          countItem <-
            pieces[2] %>% as.character() %>%  readr::parse_number() %>% suppressWarnings()

          name_item <-
            items[length(items)]

          df <-
            tibble(
              nameSECFull = sec_name,
              nameTable = 'issuer',
              countItem,
              nameSEC = name_item
            )
          return(df)
        }

        if (is_footnote) {
          if (pieces %>% length() == 1) {
            countItem <-
              0
            item <-
              pieces[[1]]
          } else {
            item <-
              pieces[[1]]
            countItem <-
              pieces[2] %>% as.character() %>% readr::parse_number() %>% suppressWarnings()
          }
          return(tibble(nameTable = 'footnotes', nameSECFull = sec_name, nameSEC = item, countItem))
        }

        if (is_sig) {
          df <-
            tibble(nameTable = 'signatures', nameSECFull = sec_name, nameSEC = name_item)
          return(df)
        }

        if (peice_length == 1) {
          df <-
            tibble(nameSECFull = sec_name, nameSEC = name_item)
          return(df)
        }

        piece_count <-
          length(pieces)

        if (piece_count == 1) {
          df <-
            tibble(nameSECFull = sec_name, nameSEC = sec_name)
          return(df)
        }

        if (piece_count == 2 &!is_footnote) {


          df <-
            tibble(nameSECFull = sec_name,
                   nameTable = pieces[[1]] ,
                   nameSEC = name_item)

          return(df)
        }

        if (piece_count > 2) {
          countItem <-
            pieces[2] %>%
            as.character() %>%
            readr::parse_number() %>% suppressWarnings()

          df <-
            tibble(
              nameSECFull = sec_name,
              nameTable = pieces[[1]] ,
              countItem,
              nameSEC = name_item
            )

          return(df)
        }

      }) %>%
      filter(!nameSEC == '')

    df_dictionary <-
      .sec_form_title_df()

    has_missing_names <-
      df_names$nameSEC[!df_names$nameSEC %in% df_dictionary$nameSEC] %>%
      length() > 0
    if (has_missing_names) {
      missing <-
        df_names$nameSEC[!df_names$nameSEC %in% df_dictionary$nameSEC] %>%
        unique()

      missing_names <-
        missing %>%
        paste0(collapse = '\n')
      stop(list("Missing:\n", missing_names) %>%
             purrr::reduce(paste0))
    }

    df_names <-
      df_names %>%
      left_join(df_dictionary) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (!'nameTable' %in% names(df_names)) {
      df_names <-
        df_names %>%
        mutate(nameTable = 'asset')
    }

    df_names <-
      df_names %>%
      select(nameTable, nameSECFull, nameSEC, nameActual, everything()) %>%
      mutate(nameTable = nameTable %>% str_replace('Id',''),
             nameTable = ifelse(nameTable %in% c('issuerCredentials','securitiesIssued'), NA, nameTable)) %>%
      suppressWarnings() %>%
      suppressMessages()
  }

.parse_xml_tables <-
  function(url = "https://www.sec.gov/Archives/edgar/data/61004/000114036117000046/doc1.xml"){
    page <-
      url %>%
      xml2::read_xml()

    tables <-
      page %>%
      xml_contents() %>%
      xml_name() %>%
      unique()

    data <-
      seq_along(tables) %>%
      future_map_dfr(function(x){
        table <-
          tables[[x]]

        if (table %in% c('headerData', 'formData')) {
          form_tables <-
            page %>% xml_contents() %>% xml_name()

          table_loc <-
            table %>% grep(form_tables)
          xml_nodes <-
            page %>%
            xml_contents() %>% .[[table_loc]]
        }

        if (table %in% c('infoTable' , 'assets')) {
          xml_nodes <-
            page %>%
            xml_contents()
        }

        if (table == 'comment') {
          value <-
            page %>% xml_contents() %>% xml_text()

          df <-
            tibble(idTable = x, nameSECFull = table, value)
          return(df)

        }

        tables_special <-
          c('headerData', 'formData', 'infoTable', 'assets')

        if (!table %in% tables_special) {

          value_search <-
            list('//', table) %>% purrr::reduce(paste0)

          xml_nodes <-
            page %>%
            xml_contents() %>%
            xml_find_all(value_search)
        }
        if (xml_nodes %>% length() > 100) {
          list("Be patient there are ", xml_nodes %>% length() %>% formattable::comma(digits = 0), ' nodes to parse') %>%
            purrr::reduce(paste0) %>% cat(fill = T)
        }
        value_list <-
          xml_nodes %>%
          as_list()

        value_list <-
          value_list[value_list %>% future_map(length) %>% flatten_dbl() > 0]

        json_data <-
          value_list %>%
          jsonlite::toJSON(force = FALSE, dataframe = 'values') %>%
          jsonlite::fromJSON(simplifyDataFrame = TRUE, flatten = TRUE)

        wrong_output <-
          json_data %>% class() == 'array'

        if (wrong_output) {
          item <-
            xml_nodes %>% xml_name()
          value <-
            xml_nodes %>% xml_text()
          json_data <-
            tibble(item, value) %>%
            spread(item, value)
        }

        if (json_data %>% length() == 0) {
          return(tibble())
        }
        if ('summaryInfo' %in% names(json_data)) {
          json_data <-
            seq_along(json_data) %>% map(
              function(x){
                js_d <- json_data[x]
                if ('summaryInfo' %in% names(js_d)) {
                  if (js_d$summaryInfo$clarificationResponses %>% length() == 0) {
                    js_d$summaryInfo$clarificationResponses <-
                      NULL
                  }
                }
                return(js_d)
              }) %>%
            flatten()

          json_data <-
            json_data[json_data %>% future_map(function(x){data.frame(x, stringsAsFactors = F)} %>% nrow()) > 0]
        }

        json_data <-
          json_data %>%
          data.frame(stringsAsFactors = FALSE) %>%
          as_tibble() %>%
          mutate_all(as.character) %>%
          mutate(idTable = x) %>%
          gather(nameSECFull, value, -idTable) %>%
          arrange(idTable)
        return(json_data)
      })

    data <-
      data %>%
      mutate(isList = value %>% str_detect('list')) %>%
      filter(!isList) %>%
      select(-isList) %>%
      mutate(
        nameSECFull = nameSECFull %>% str_replace_all(
          "filerInfo.flags.|filerInfo.filer.|coverPage.|.filer.|\\flags.|filer.credentials.",
          ''
        ),
        nameSECFull = nameSECFull %>% str_replace_all('filerInfo.|issuerCredentials.', '')
      )


    rm(tables)
    rm(page)
    rm(url)
    return(data)
  }

.parse_sec_form <-
  function(url = "https://www.sec.gov/Archives/edgar/data/61004/000114036117000046/doc1.xml",
           return_message = TRUE) {
    data <-
      .parse_xml_tables(url = url)

    if (!'nameSECFull' %in% names(data)) {
      data <-
        data %>%
        mutate(nameSECFull = nameSEC)
    }

    cik <-
      url %>% str_replace_all('https://www.sec.gov/Archives/edgar/data/', '') %>% str_split('/') %>% flatten_chr() %>% .[[1]] %>% as.character() %>% readr::parse_number() %>% suppressMessages()

    df_title <-
      .sec_form_title_df()

    is_13FInfo <-
      url %>% str_detect('form13fInfoTable.xml|infotable.xml')
    sec_names <-
      data$nameSECFull %>% unique()

    df_names <-
      .parse_full_form_names(sec_names = sec_names)

    df_names <-
      df_names %>%
      mutate(nameTable = ifelse(
        nameSECFull %>% str_detect("issuerAddress"),
        "issuerAddress",
        nameTable),
        nameTable =  ifelse(
          nameSECFull %>% str_detect("reportingOwner"),
          "reportingOwner",
          nameTable)
      ) %>%
      mutate(nameTable = ifelse(nameSECFull %>% str_detect("issuerInfo."), 'issuerInfo', nameTable),
             nameTable = ifelse(nameSECFull %>% str_detect("securitiesIssued."), 'securitiesIssued', nameTable),
             nameTable = ifelse(nameSECFull %>% str_detect("summaryInfo."), 'summaryInfo', nameTable),
             nameTable = ifelse(nameSECFull %>% str_detect("^comment[A-Z]"), 'Comments', nameTable)
      )

    if (is_13FInfo) {
      df_names <-
        df_names %>%
        mutate(nameTable = 'holdingsInformation')
    }
    if (!'nameSEC' %in% names(data)) {
      data <- data %>%
        mutate(nameSEC = nameSECFull)
    }
    data <-
      data %>%
      select(-nameSEC) %>%
      left_join(df_names) %>%
      mutate(nameActual = ifelse(nameSECFull == "X.1.A.A.", 'idForm', nameActual)) %>%
      suppressMessages()

    if ('countItem' %in% names(data)) {
      data <-
        data %>%
        select(nameTable, countItem, nameSECFull, nameActual, everything()) %>%
        mutate(countItem = countItem - 1) %>%
        suppressMessages()
    }

    if ('property' %in% data$nameTable) {
      data <-
        data %>%
        mutate(nameTable = ifelse(nameTable %>% is.na(), 'Asset', nameTable))
    }

    has_metadata <-
      data %>%
      filter(nameTable %>% is.na()) %>% nrow() > 0

    if (has_metadata) {
      df_metadata <-
        data %>%
        filter(nameTable %>% is.na()) %>%
        select(nameActual, value) %>%
        group_by(nameActual) %>%
        mutate(countItem = 1:n() - 1) %>%
        arrange(countItem) %>%
        ungroup() %>%
        filter(!nameActual %>% str_detect('idCCC')) %>%
        mutate(nameActual = ifelse(countItem == 0, nameActual, nameActual %>% paste0(countItem))) %>%
        select(-countItem)

      col_order <-
        df_metadata$nameActual

      df_metadata <-
        df_metadata %>%
        spread(nameActual, value) %>%
        select(one_of(col_order)) %>%
        mutate(urlSECFiling = url) %>%
        .resolve_form_columns()
    } else {
      df_metadata <-
        tibble(idCIKFiler = cik,
               urlSECFiling = url)
    }

    tables <-
      data %>%
      filter(!nameTable %>% is.na()) %>%
      .$nameTable %>%
      unique()

    data <-
      seq_along(tables) %>%
      future_map(function(x) {
        table <-
          tables[[x]]
        table_name <-
          list('data',
               table %>% substr(1, 1) %>% str_to_upper(),
               table %>% substr(2, nchar(table))) %>%
          purrr::reduce(paste0)

        table_df <-
          data %>%
          filter(nameTable == table) %>%
          select(dplyr::matches("countItem"), nameActual, value) %>%
          select(which(colMeans(is.na(.)) < 1)) %>%
          group_by(nameActual) %>%
          mutate(countItem = 1:n() - 1) %>%
          ungroup()

        has_counts <-
          table_df$countItem %>% max(na.rm = TRUE) > 0

        if (has_counts) {
          table_df <-
            table_df %>%
            arrange(countItem)

          col_order <- c('countItem', table_df$nameActual)

          table_df <-
            table_df %>%
            spread(nameActual, value) %>%
            select(one_of(col_order)) %>%
            mutate(urlSECFiling = url) %>%
            .resolve_form_columns()

          table_df <-
            table_df %>%
            nest(-urlSECFiling, .key = data)
        } else {
          table_df <-
            table_df %>%
            select(-countItem)
          col_order <- c(table_df$nameActual)

          table_df <-
            table_df %>%
            spread(nameActual, value) %>%
            select(one_of(col_order)) %>%
            .resolve_form_columns() %>%
            mutate(urlSECFiling = url)

          table_df <-
            table_df %>%
            nest(-urlSECFiling, .key = data)
        }
        names(table_df)[[2]] <-
          table_name

        df_metadata <-
          df_metadata %>%
          left_join(table_df) %>%
          suppressMessages()

      }) %>%
      reduce(left_join) %>%
      suppressMessages()

    ## maybe add IDCK


    rm(df_metadata)
    return(data)
  }

.parse_form_data <-
  function(.all_filings, filter_parameter = 'isXBRLInstanceFile', return_message = TRUE) {
    df_search <-
      .all_filings %>%
      filter_(.dots = filter_parameter)

    if (filter_parameter == 'isXBRLInstanceFile') {
      if (df_search %>% nrow() == 0) {
        return(tibble())
      }
      parse_xbrl_filer_url_safe <-
        purrr::possibly(.parse_xbrl_filer_url, tibble())
      all_data <-
        df_search$urlSECFiling %>%
        unique() %>%
        future_map_dfr(function(x) {
          .parse_xbrl_filer_url(url = x, return_message = return_message)
        })
      all_data <-
        all_data %>%
        select(-dplyr::matches("idCIK1|nameFiler1")) %>%
        left_join(df_search %>% select(dplyr::matches("idForm"), dplyr::matches("idAccession"), dplyr::matches("nameFile"), dateFiling, urlSECFiling)) %>%
        select(
          dplyr::matches("idCIK"),
          dplyr::matches("name[Entity]|name[Filer]"),
          dateFiling,
          dplyr::matches("idForm"),
          dplyr::matches("idAccession"),
          dplyr::matches("nameFile"),
          everything()
        ) %>%
        suppressMessages()

      return(all_data)
    }

    if (filter_parameter == 'is13FFiling') {
      urls_df <-
        df_search %>% select(urlSECFiling, urlSECFilingDirectory)
      df_13f_urls <-
        1:nrow(urls_df) %>%
        future_map_dfr(function(x){

          row_df <-
            urls_df %>% slice(x)

          url <- row_df$urlSECFiling
          urlSECFilingDirectory <-
            row_df$urlSECFilingDirectory
          parts <-
            url %>%
            str_replace_all("https://www.sec.gov/Archives/edgar/data/", '') %>%
            str_split('\\/') %>%
            flatten_chr()
          idCIKFiler <-
            parts[[1]] %>% as.numeric()
          slugAccession <-
            parts[[2]]

          isPrimary <-
            parts[[3]] %>% str_detect("primary")

          tibble(idCIKFiler, slugAccession, isPrimary, urlSECFiling = url, urlSECFilingDirectory)
        })

      slugs <-
        df_13f_urls$slugAccession %>% unique()

      df_13fs <-
        seq_along(slugs) %>%
        future_map_dfr(function(x){
          slug <-
            slugs[[x]]
          df_period <-
            df_13f_urls %>%
            filter(slugAccession == slug)
          if (df_period %>% nrow() == 2) {
            primary_url <-
              df_period %>% filter(isPrimary) %>%
              .$urlSECFiling
            df_primary <-
              .parse_sec_form(url = primary_url, return_message = return_message) %>%
              mutate(urlSECFiling = primary_url)

            df_primary <-
              df_primary %>%
              left_join(df_13f_urls) %>%
              suppressWarnings()

            no_primary_url <-
              df_period %>% filter(!isPrimary) %>%
              .$urlSECFiling

            urlSECFilingDirectory <-
              df_period %>% filter(!isPrimary) %>%
              .$urlSECFilingDirectory

            df_primary_no <-
              .parse_sec_form(url = no_primary_url, return_message = return_message) %>%
              mutate(urlSECFiling = no_primary_url)

            data <-
              df_primary %>%
              select(-dplyr::matches("urlSECFiling")) %>%
              left_join(df_primary_no %>% select(-dplyr::matches("urlSECFiling"))) %>%
              mutate(urlSECFilingDirectory = urlSECFilingDirectory) %>%
              suppressMessages()
            return(data)
          } else {
            period_url <-
              df_period$urlFiling
            urlSECFilingDirectory <-
              df_period$urlSECFilingDirectory
            data <-
              .parse_sec_form(url = period_url, return_message = return_message) %>%
              mutate(urlFiling = period_url) %>%
              left_join(df_period) %>%
              mutate(urlSECFilingDirectory = urlSECFilingDirectory)
            return(data)
          }
        })

      df_13fs <-
        df_13fs %>%
        left_join(urls_df) %>%
        left_join(df_search %>% select(dateFiling, datePeriodReport, datetimeAccepted, urlSECFilingDirectory, dplyr::matches("urlTextFilingFull"))) %>%
        select(-dplyr::matches("slugAcession")) %>%
        select(dplyr::matches("idCIKFiler"), dplyr::matches("nameFilingManager"), everything()) %>%
        select(dateFiling, everything()) %>%
        suppressMessages()

      return(df_13fs)
    }

    if (filter_parameter == 'isFormD') {
      if ('idForm' %in% names(df_search)){
        df_search <-
          df_search %>%
          filter(!idForm %>% str_detect("10"))
      }
    }
    if (df_search %>% nrow() == 0) {
      return(tibble())
    }

    parse_sec_form_safe <-
      purrr::possibly(.parse_sec_form, tibble())
    all_data <-
      df_search$urlSECFiling %>%
      unique() %>%
      future_map_dfr(function(x) {
        parse_sec_form_safe(url = x, return_message = return_message)
      })

    if (all_data %>% nrow() == 0) {
      return(all_data)
    }

    all_data <-
      all_data %>%
      select(-dplyr::matches("idCIK1|nameFiler1")) %>%
      left_join(df_search %>% select(dplyr::matches("idForm"), dplyr::matches("idAccession"), dplyr::matches("nameFile"), dateFiling, urlSECFiling)) %>%
      select(
        dplyr::matches("idCIK"),
        dplyr::matches("name[Entity]|name[Filer]"),
        dateFiling,
        dplyr::matches("idForm"),
        dplyr::matches("idAccession"),
        dplyr::matches("nameFile"),
        everything()
      ) %>%
      suppressMessages()

    if (filter_parameter == 'hasAssetFile') {
      if('dataComments' %in% names(all_data)) {
        df_comments <-
          all_data %>%
          select(idCIKFiler, dplyr::matches("idAccession"), dplyr::matches("dataComments")) %>%
          mutate(isNULL = dataComments %>% map_lgl(is_null)) %>%
          filter(!isNULL) %>%
          distinct() %>%
          select(-isNULL)

        all_data <-
          all_data %>%
          select(-dataComments) %>%
          mutate(isNULL = dataAsset %>% map_lgl(is_null)) %>%
          filter(!isNULL) %>%
          filter(!nameFile == "ASSET RELATED DOCUMENT") %>%
          distinct() %>%
          select(-isNULL) %>%
          left_join(df_comments) %>%
          suppressMessages()
      }
    }

    all_data <-
      all_data %>%
      select(which(colMeans(is.na(.)) < 1))
    return(all_data)
  }


# index_parsing -----------------------------------------------------------
.parse_sec_filing_index <-
  function(urls, return_message = TRUE) {
    df <-
      tibble()
    success <- function(res) {
      if (return_message) {
        list("Parsing: ", res$url) %>% purrr::reduce(paste0) %>% cat(fill = T)
      }
      page <-
        res$content %>%
        read_html()

      not_503 <-
        !res$status_code == 503

      cik <-
        res$url %>%
        str_split('data/') %>%
        flatten_chr() %>%
        .[[2]] %>%
        str_split('/') %>%
        flatten_chr() %>%
        .[[1]] %>%
        as.numeric()
      if (not_503){
        values <-
          page %>%
          html_nodes('.info') %>%
          html_text()

        if (length(values) == 0) {
          return(tibble())
        }

        items <-
          page %>%
          html_nodes('.infoHead') %>%
          html_text()

        all_items <-
          items %>%
          map_chr(function(x) {
            is_zero <-
              x %>% str_count('\\ ') == 0

            if (x == 'Accepted') {
              return("datetimeAccepted")
            }

            if (x == 'Documents') {
              return('countDocuments')
            }
            if (x == "items") {
              return('descriptionItems')
            }

            if (is_zero) {
              return('item' %>% paste0(x))
            }

            if (x == "Period of Report") {
              return("datePeriodReport")
            }

            if (x == "429 Reference" | x %>% str_detect("Reference")) {
              return("reference429")
            }

            name_items <-
              x %>% str_split('\\ ') %>%
              flatten_chr()

            first <-
              name_items[name_items %>% length()] %>% str_to_lower()

            end <-
              name_items[1:(name_items %>% length() - 1)] %>%
              paste0(collapse = '') %>%
              str_to_title()

            final_name <-
              list(first, end) %>% purrr::invoke(paste0, .)
            return(final_name)
          })
        search_url <-
          res$url
        df_metadata <-
          tibble(item = all_items,
                 value = values) %>%
          mutate(urlSECFilingDirectory = search_url) %>%
          spread(item, value)

        df_metadata <-
          df_metadata %>%
          mutate_at(df_metadata %>% select(dplyr::matches('count')) %>% names(),
                    funs(. %>% as.numeric())) %>%
          mutate_at(
            df_metadata %>% select(dplyr::matches('^date[A-Z]')) %>%  select(-dplyr::matches("datetime"))  %>% names(),
            funs(. %>% lubridate::ymd())
          ) %>%
          mutate_at(
            df_metadata %>% select(dplyr::matches('^datetime')) %>%  select(-dplyr::matches("datetime"))  %>% names(),
            funs(. %>% lubridate::ymd_hms())
          )

        urlSECFiling <-
          page %>%
          html_nodes('#formDiv a') %>%
          html_attr('href') %>%
          paste0('https://www.sec.gov', .)

        namehref <-
          page %>%
          html_nodes('#formDiv a') %>%
          html_text()

        files <-
          page %>%
          html_nodes('#formDiv td:nth-child(2)') %>%
          html_text() %>%
          str_to_upper()

        wrong_length <-
          !(namehref %>% length() == files %>% length())

        if (wrong_length) {
          namehref <-
            namehref[namehref %>% str_detect("\\.")]
          urlSECFiling <-
            urlSECFiling[2:length(urlSECFiling)]
        }

        files <-
          files %>%
          str_trim()

        types_form <-
          page %>%
          html_nodes('td:nth-child(4)') %>%
          html_text() %>%
          str_trim()

        types_form[types_form == ''] <-
          NA

        types_form <-
          types_form[seq_along(files)]

        files[files == ''] <-
          NA
        search_url <-
          res$url
        data <-
          tibble(
            nameFile = files,
            nameHref = namehref,
            typeForm = types_form,
            urlSECFiling
          ) %>%
          mutate(
            isXML = nameHref %>% str_detect("xml"),
            isForm3_4 = nameHref %>% str_detect('doc3.xml|doc4.xml'),
            isFormD = ifelse(
              isXML & typeForm %in% c("D", "D/A"),
              TRUE,
              FALSE
            ),
            is13FFiling = ifelse(
              isXML& typeForm %>% str_detect("13F-HR|INFORMATION TABLE"),
              TRUE,
              FALSE
            ),
            hasSmallOfferingData = ifelse(isXML &
                                            typeForm %>% str_detect("1-A|1-A/A"),
                                          TRUE,
                                          FALSE),
            hasSmallOfferingData = ifelse(typeForm == "C" & isXML, TRUE, hasSmallOfferingData),
            hasAssetFile = typeForm %>% str_detect("EX-102|EX-103")
          ) %>%
          tidyr::separate(nameHref,
                          into = c('nameHREF', 'typeFile'),
                          sep = '\\.') %>%
          mutate(urlSECFilingDirectory = search_url) %>%
          mutate(
            nameFile = ifelse(nameFile == '', NA, nameFile %>% str_to_upper()),
            isCompleteTextFiling = nameFile %>% str_detect("COMPLETE SUBMISSION"),
            isXBRLInstanceFile = ifelse(nameFile %>% str_detect("XBRL INSTANCE"), TRUE, FALSE),
            isImage = ifelse(typeFile %in% c('jpg', 'gif', 'tiff', 'png'), TRUE, FALSE),
            isPDF = ifelse(typeFile %in% c('pdf'), TRUE, FALSE)
          )

        data <-
          data %>%
          left_join(df_metadata) %>%
          mutate(idCIK = cik) %>%
          select(idCIK, dplyr::matches("date"), dplyr::matches("count"), everything()) %>%
          suppressWarnings() %>%
          suppressMessages()
      } else {
        search_url <-
          res$url
        data <-
          tibble(idCIK = cik,
                 urlSECFilingDirectory = search_url)
      }

      df <<-
        df %>%
        bind_rows(data)
    }
    failure <- function(msg){
      tibble()
    }
    urls %>%
      walk(function(x){
        curl_fetch_multi(url = x, success, failure)
      })
    multi_run()
    df
  }

.all_filings <-
  function(urls, return_message = TRUE)  {
  df_filings <-
    urls %>%
    future_map_dfr(function(x){
      .parse_sec_filing_index(urls = x, return_message = return_message)
    })

  return(df_filings)
}

.all_filing_urls <-
  function(data, nest_data = TRUE,
           return_message = TRUE) {
    if (!'urlSECFilingDirectory' %in% names(data)) {
      stop("urlSECFilingDirectory needs to be in the data fields")
    }
    if (!'idAccession' %in% names(data)) {
      df_accession <-
        data$urlSECFilingDirectory %>%
        unique() %>%
        future_map_dfr(function(x){
          urlSECFilingDirectory <-
            x

          idAccession <-
            x %>% str_replace_all('https://www.sec.gov/Archives/edgar/data/', '') %>%
            str_split('\\/') %>%
            flatten_chr() %>% {
              .[length(.)] %>% str_replace_all('-index.htm', '')
            }
          tibble(idAccession, urlSECFilingDirectory)
        })

      data <-
        data %>%
        left_join(df_accession) %>%
        suppressMessages()
    }
    data <-
      data %>%
      select(-dplyr::matches("hasAssetFile|isFormD|is13F|isForm3_4|hasSmallOfferingData")) %>%
      filter(typeFile %>% str_detect("htm")) %>%
      group_by(idAccession) %>%
      mutate(countAccension = 1:n()) %>%
      filter(countAccension == max(countAccension)) %>%
      ungroup() %>%
      arrange(dateFiling)

    urls <-
      data$urlSECFilingDirectory

    df_all_filings <-
      .all_filings(urls = urls, return_message = return_message)

    df_all_filings <-
      df_all_filings %>%
      left_join(data %>% select(urlSECFilingDirectory, countAccension, idAccession)) %>%
      suppressMessages()

    if (nest_data) {
      df_all_filings <-
        df_all_filings %>%
        nest(-c(idAccession, countAccension, urlSECFilingDirectory), .key = dataFilings)

    }
    df_all_filings
  }

# Text Form ---------------------------------------------------------------
.header_names <-
  function() {
    tibble(
      nameSEC = c(
        "ACCEPTANCE-DATETIME",
        "ACCESSION NUMBER",
        "CONFORMED SUBMISSION TYPE",
        "PUBLIC DOCUMENT COUNT",
        "FILED AS OF DATE",
        "DATE AS OF CHANGE",
        "COMPANY CONFORMED NAME",
        "CENTRAL INDEX KEY",
        "STANDARD INDUSTRIAL CLASSIFICATION",
        "IRS NUMBER",
        "STATE OF INCORPORATION",
        "FISCAL YEAR END",
        "FORM TYPE",
        "SEC ACT",
        "SEC FILE NUMBER",
        "FILM NUMBER",
        "STREET 1",
        "CITY",
        "STATE",
        "ZIP",
        "BUSINESS PHONE",
        "FORMER CONFORMED NAME",
        "DATE OF NAME CHANGE",
        "STREET 2",
        "CONFORMED PERIOD OF REPORT",
        "ITEM INFORMATION"
      ),
      nameActual = c(
        "datetimeAccepted",
        "idAccession",
        "idForm",
        "countPublicDocuments",
        "dateFiling",
        "dateFilingChange",
        "nameCompany",
        "idCIK",
        "nameCodeSIC",
        "idIRS",
        "stateIncorporation",
        "monthdayFiscalYearEnd",
        "typeForm",
        "idSECAct",
        "idSEC",
        "idFilm",
        "addressStreet1",
        "city",
        "state",
        "zipcode",
        "telephone",
        "nameCompanyFormer",
        "dateNameChange",
        'addressStreet2',
        'dateReportPeriod',
        'descriptionItem'
      )
    )
  }

.section_names <-
  function() {
    tibble(nameSectionSEC = c(NA, "SUBJECT COMPANY", "FILED BY", 'ISSUER', 'REPORTING-OWNER'),
           nameSectionActual = c('', '', 'FilingEntity', 'Issuer', 'ownerReporting')
    )
  }

.parent_names <-
  function() {
    tibble(nameParentSEC = c(NA, "COMPANY DATA", "FILING VALUES", "BUSINESS ADDRESS", "MAIL ADDRESS",
                             "FORMER COMPANY"),
           nameParentActual = c('', '', '', 'Business', 'Mailing', ''))
  }

.parse_text_headers <-
  function(text_blob){
  header_start <-
    text_blob %>% grep("<SEC-HEADER>",.) + 1

  header_end <-
    text_blob %>% grep("</SEC-HEADER>",.) - 1

  header_text <-
    text_blob[header_start:header_end]

  header_text <-
    header_text %>% str_replace_all('\\<','') %>% str_replace_all('\\>',':')

  df_headers <- tibble(text = header_text) %>%
    tidyr::separate(col = text, into = c('nameSEC', 'value'), sep = '\\:') %>%
    mutate(value = value %>% str_replace_all("\t", '')) %>%
    mutate(idRow = 1:n())

  df_parents <-
    df_headers %>%
    filter(value == '') %>%
    mutate(idRow = idRow + 1) %>%
    dplyr::rename(nameParentSEC = nameSEC) %>%
    select(-value)

  df_section <-
    df_parents %>%
    filter(nameParentSEC %in% c("SUBJECT COMPANY", "FILED BY")) %>%
    select(nameSectionSEC = nameParentSEC, idRow) %>%
    mutate(idRow = idRow + 1)

  df_parents <-
    df_parents %>%
    filter(!nameParentSEC %in% c("SUBJECT COMPANY", "FILED BY")) %>%
    left_join(df_section) %>%
    fill(nameSectionSEC) %>%
    select(nameSectionSEC, nameParentSEC, idRow) %>%
    suppressMessages()

  data <-
    df_headers %>%
    filter(!value == '') %>%
    left_join(df_parents) %>%
    select(idRow, nameSectionSEC, nameParentSEC, everything()) %>%
    tidyr::fill(nameSectionSEC) %>%
    tidyr::fill(nameParentSEC) %>%
    select(-idRow) %>%
    distinct() %>%
    suppressWarnings() %>%
    suppressMessages()

  df_parents <-
    .parent_names()

  df_names <-
    .header_names()
  df_sections <-
    .section_names()

  has_missing_names <-
    data$nameSEC[!data$nameSEC %in% df_names$nameSEC] %>% length() > 0

  if (has_missing_names) {
    df_missing <-
      data$nameSEC[!data$nameSEC %in% df_names$nameSEC] %>% unique() %>%
      future_map_dfr(function(x){
        parts <-
          x %>% str_replace_all('\\-', ' ') %>%
          str_split('\\ ') %>% flatten_chr()


        first <-
          parts[length(parts)] %>%
          str_to_lower()

        is_cik <-
          first %>% str_detect('cik') %>% sum(na.rm = TRUE) > 0

        if (is_cik) {
          first <-
            'idCIK'
        }

        other <-
          list(parts[1:(length(parts) - 1)] %>% str_to_title) %>%
          purrr::reduce(paste0) %>%
          paste0(collapse = '')

        actual <-
          list(first,other) %>%
          purrr::reduce(paste0)

        tibble(nameSEC = x, nameActual = actual)
      })

    df_names <- df_names %>%
      bind_rows(df_missing)
  }

  data <-
    data %>%
    left_join(df_parents) %>%
    left_join(df_sections) %>%
    left_join(df_names) %>%
    mutate(nameParentActual = ifelse(nameParentActual %>% is.na(), '', nameParentActual)) %>%
    suppressMessages() %>%
    unite(nameItem, nameActual, nameParentActual, nameSectionActual, sep = '') %>%
    select(nameItem, value) %>%
    suppressWarnings() %>%
    group_by(nameItem) %>%
    mutate(countItem = 1:n() - 1) %>%
    ungroup() %>%
    mutate(nameItem = ifelse(countItem == 0, nameItem, paste0(nameItem, countItem))) %>%
    suppressMessages() %>%
    select(-countItem)


  col_order <-
    data$nameItem

  data <-
    data %>%
    spread(nameItem, value) %>%
    select(one_of(col_order))

  data <-
    data %>%
    mutate_at(data %>% select(dplyr::matches("datetime")) %>% names(),
              funs(. %>% lubridate::ymd_hms())) %>%
    mutate_at(data %>% select(dplyr::matches("^date[A-Z]")) %>% select(-dplyr::matches("datetime")) %>% names(),
              funs(. %>% lubridate::ymd())) %>%
    mutate_at(data %>% select(dplyr::matches("idCIK|count|monthdayFiscalYearEnd")) %>% names(),
              funs(. %>% as.numeric())) %>%
    mutate_at(data %>% select(dplyr::matches("name[A-Z]|type[A-Z]|description|class")) %>% names(),
              funs(. %>% stringr::str_to_upper()))

  if ('nameCodeSIC' %in% names(data)) {
    data <-
      data %>%
      separate(nameCodeSIC, into = c('nameIndustry', 'idSIC'), sep = '\\[') %>%
      mutate(nameIndustry = nameIndustry %>% str_trim() %>% str_to_upper(),
             idSIC = idSIC %>% as.character() %>% readr::parse_number()) %>%
      suppressWarnings()
  }
  return(data)
}

.parse_for_text <-
  function(text_blob) {
  text_start <-
    text_blob %>% grep("<TEXT>",.) %>% .[[1]] + 1

  text_end <-
    text_blob %>% grep("</TEXT>",.)

  text_end <-
    text_end %>% max() - 1

  df_text <-
    tibble(textRow = text_blob[text_start:text_end]) %>%
    mutate(idRow = 1:n()) %>%
    select(idRow, everything())

  return(df_text)
}

.parse_text_filing <-
  function(url = "https://www.sec.gov/Archives/edgar/data/732712/000119312517025716/0001193125-17-025716.txt") {
    text_blob <-
      url %>%
      readr::read_lines() %>% {
        .[!. == ''] %>%
          str_trim()
      }

    has_html <-
      text_blob %>% str_count("<HTML>") %>% sum(na.rm = TRUE) > 0
    has_xml <-
      text_blob %>% str_count("<XML>") %>% sum(na.rm = TRUE) > 0

    df_headers <-
      .parse_text_headers(text_blob = text_blob)

    df_text <-
      .parse_for_text(text_blob = text_blob) %>%
      mutate(idAccession = df_headers$idAccession) %>%
      nest(-idAccession, .key = textFiling)

    data <-
      df_headers %>%
      left_join(df_text) %>%
      mutate(urlSECFiling = url,
             hasHTML = has_html,
             hasXML = has_xml) %>%
      select(dplyr::matches("idCIK"), dplyr::matches("dateFiling"), idAccession, dplyr::matches("idForm"), dplyr::matches("nameCompany"), everything())

    return(data)

  }


.sec_complete_filings <-
  function(urls = c("https://www.sec.gov/Archives/edgar/data/732712/000119312517030264/0001193125-17-030264.txt", "https://www.sec.gov/Archives/edgar/data/732712/000161159317000024/0001611593-17-000024.txt", "https://www.sec.gov/Archives/edgar/data/1629703/000161159317000025/0001611593-17-000025.txt", "https://www.sec.gov/Archives/edgar/data/1284999/000161159317000014/0001611593-17-000014.txt"),
           return_message =  TRUE) {
    df <-
      tibble()
    success <- function(res) {
      url <-
        res$url
      if (return_message) {
        list("Parsing: ", url, "\n") %>% purrr::reduce(paste0) %>% cat(fill = T)
      }

      data <-
        .parse_text_filing(url = url)
      df <<-
        df %>%
        bind_rows(data)
    }
    failure <- function(msg) {
      tibble()
    }
    urls %>%
      walk(function(x) {
        curl_fetch_multi(url = x, success, failure)
      })
    multi_run()
    df
  }


# XBRL Finder -------------------------------------------------------------
.parse_xbrl_filer_url <-
  function(url = "https://www.sec.gov/Archives/edgar/data/1037540/000165642316000023/bxp-20160930.xml",
           return_message = TRUE) {
    options(stringsAsFactors = FALSE, scipen = 999999)
    cik <-
      url %>%
      str_split('data/') %>%
      flatten_chr() %>%
      .[[2]] %>%
      str_split('/') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()
    td <-
      tempdir()
    tf <-
      tempfile(tmpdir = td, fileext = ".xml")

    url %>%
      curl::curl_download(destfile = tf)

    doc <-
      tf %>%
      XBRL::xbrlParse()


    ## Get a data frame with facts:
    df_fct <-
      XBRL::xbrlProcessFacts(doc) %>%
      as_tibble()

    df_fct <-
      df_fct %>%
      mutate(
        isNumber = ifelse(!fact %>% readr::parse_number() %>% is.na(), TRUE, FALSE),
        amountFact = ifelse(isNumber == TRUE, fact %>%as.character() %>%  readr::parse_number(), NA)
      ) %>%
      separate(elementId,
               c('codeElement', 'nameElement'),
               sep = '\\_',
               remove = FALSE) %>%
      suppressWarnings()
    ## Get a data frame with contexts:
    df_cts <-
      XBRL::xbrlProcessContexts(doc) %>%
      as_tibble()
    ## Get a data frame with units:
    df_unt <-
      XBRL::xbrlProcessUnits(doc) %>%
      as_tibble()

    df_sch <-
      XBRL::xbrlGetSchemaName(doc) %>%
      as_tibble()

    df_footnotes <-
      XBRL::xbrlProcessFootnotes(doc) %>%
      as_tibble()


    ## Free the external memory used:
    XBRL::xbrlFree(doc)
    url_xsd <-
      url %>% str_replace(".xml", ".xsd")
    url_xsd %>%
      curl_download(destfile = tf)

    ## Parse the schema file:
    docS <-
      tf %>%
      XBRL::xbrlParse()
    ## Get roles:
    df_rls <-
      docS %>%
      XBRL::xbrlProcessRoles() %>%
      as_tibble()

    ## calculation
    url_cal <-
      url %>% str_replace(".xml", "_cal.xml")
    if (suppressWarnings(httr::url_ok(url_cal))){
      url_cal %>%
        curl_download(destfile = tf)

      docS <-
        tf %>%
        XBRL::xbrlParse()

      df_calcs <-
        docS %>%
        XBRL::xbrlProcessArcs(arcType = 'calculation') %>%
        as_tibble()
    } else {
      df_calcs <-
        tibble()
    }

    ## definition
    url_def <-
      url %>% str_replace(".xml", "_def.xml")

    url_def %>%
      curl_download(destfile = tf)

    docS <-
      tf %>%
      XBRL::xbrlParse()

    df_defs <-
      docS %>%
      XBRL::xbrlProcessArcs(arcType = 'definition') %>%
      as_tibble()

    ## labels
    url_lab <-
      url %>% str_replace(".xml", "_lab.xml")

    url_lab %>%
      curl_download(destfile = tf)

    docS <-
      tf %>%
      XBRL::xbrlParse()

    df_labels <-
      docS %>%
      XBRL::xbrlProcessLabels() %>%
      as_tibble()

    ## presentation
    url_pre <-
      url %>% str_replace(".xml", "_pre.xml")

    url_pre %>%
      curl_download(destfile = tf)

    docS <-
      tf %>%
      XBRL::xbrlParse()

    ## Free the external memory used:
    tf %>%
      unlink()
    data <-
      tibble(
        idCIK = cik,
        urlSECFiling = url,
        dataFacts = list(df_fct),
        dataContexts = list(df_cts),
        dataUnits = list(df_unt),
        dataFootnotes = list(df_footnotes),
        dataRoles = list(df_rls),
        dataCalculations = list(df_calcs) ,
        dataDefinitions = list(df_defs),
        dataLabel = list(df_labels)
      )
    td %>% unlink()
    tf %>% unlink()

    return(data)
  }


# dictionaries ------------------------------------------------------------
.sec_form_title_df <-
  function() {
    tibble(
      nameSEC = c(
        "conversionOrExercisePrice",
        "deemedExecutionDate",
        "directOrIndirectOwnership",
        "documentType",
        "equitySwapInvolved",
        "exerciseDate",
        "expirationDate",
        "footnote",
        "isDirector",
        "isOfficer",
        "isOther",
        "issuerCik",
        "issuerName",
        "issuerTradingSymbol",
        "isTenPercentOwner",
        "natureOfOwnership",
        "noSecuritiesOwned",
        "notSubjectToSection16",
        "officerTitle",
        "otherText",
        "periodOfReport",
        "postTransactionAmountsOwnedFollowingTransaction",
        "remarks",
        "rptOwnerCik",
        "rptOwnerCity",
        "rptOwnerName",
        "rptOwnerState",
        "rptOwnerStateDescription",
        "rptOwnerStreet1",
        "rptOwnerStreet2",
        "rptOwnerZipCode",
        "schemaVersion",
        "securityTitle",
        "sharesOwnedFollowingTransaction",
        "signatureDate",
        "signatureName",
        "transactionAcquiredDisposedCode",
        "transactionCode",
        "transactionDate",
        "transactionFormType",
        "transactionPricePerShare",
        "transactionShares",
        "transactionTimeliness",
        "transactionTotalValue",
        "underlyingSecurityShares",
        "underlyingSecurityTitle",
        "clarificationOfResponse", "isBusinessCombinationTransaction",
        "cik", "moreThanOneYear", "previousName", "edgarPreviousNameList",
        "entityName", "entityType", "entityTypeOtherDesc", "federalExemptionsExclusions",
        "industryGroupType", "investmentFundType", "investmentFundInfo",
        "hasNonAccreditedInvestors", "numberNonAccreditedInvestors",
        "totalNumberAlreadyInvested", "city", "stateOrCountry", "stateOrCountryDescription",
        "street1", "street2", "zipCode", "issuerPhoneNumber", "issuerPreviousNameList",
        "jurisdictionOfInc", "overFiveYears", "yearOfInc", "withinFiveYears",
        "yetToBeFormed", "aggregateNetAssetValueRange", "revenueRange",
        "minimumInvestmentAccepted", "totalAmountSold", "totalOfferingAmount",
        "totalRemaining", "firstName", "lastName", "middleName", "relationship",
        "relationshipClarification", "dollarAmount", "isEstimate", "associatedBDCRDNumber",
        "associatedBDName", "foreignSolicitation", "recipientCRDNumber",
        "recipientName", "description", "state", "statesOfSolicitationList",
        "authorizedRepresentative", "nameOfSigner", "signatureTitle",
        "submissionType", "testOrLive", "dateOfFirstSale", "yetToOccur",
        "isAmendment", "descriptionOfOtherType", "isDebtType", "isEquityType",
        "isMineralPropertyType", "isOptionToAcquireType", "isOtherType",
        "isPooledInvestmentFundType", "isSecurityToBeAcquiredType", "isTenantInCommonType",
        'notSubjectToSection16', 'rptOwnerStreet1', 'rptOwnerStreet2',

        "liveTestFlag", "confirmingCopyFlag", "returnCopyFlag", "overrideInternetFlag",
        "ccc", "reportCalendarOrQuarter", "filingManagername", "filingManageraddressstreet1",
        "filingManageraddressstreet2", "filingManageraddresscity", "filingManageraddressstateOrCountry",
        'filingManagerstateOrCountryDescription',
        "filingManageraddresszipCode", "reportType", "form13FFileNumber",
        "provideInfoForInstruction5", "name", "title", "phone", "signature",
        "otherIncludedManagersCount", "tableEntryTotal", "tableValueTotal",
        "isConfidentialOmitted",
        "nameOfIssuer", "titleOfClass", "cusip", "value", "investmentDiscretion",
        "otherManager", "putCall", "sshPrnamt", "sshPrnamtType", "Sole",
        "Shared", "None",

        "offeringFileNumber", "sinceLastFiling", "jurisdictionOrganization",
        "yearIncorporation", "sicCode", "irsNum", "fullTimeEmployees",
        "partTimeEmployees", "phoneNumber", "connectionName", "industryGroup",
        "cashEquivalents", "investmentSecurities", "accountsReceivable",
        "propertyPlantEquipment", "totalAssets", "accountsPayable", "longTermDebt",
        "totalLiabilities", "totalStockholderEquity", "totalLiabilitiesAndEquity",
        "totalRevenues", "costAndExpensesApplToRevenues", "depreciationAndAmortization",
        "netIncome", "earningsPerShareBasic", "earningsPerShareDiluted",
        "nameAuditor", "commonEquityClassName", "outstandingCommonEquity",
        "commonCusipEquity", "publiclyTradedCommonEquity", "preferredEquityClassName",
        "outstandingPreferredEquity", "preferredCusipEquity", "publiclyTradedPreferredEquity",
        "debtSecuritiesClassName", "outstandingDebtSecurities", "cusipDebtSecurities",
        "publiclyTradedDebtSecurities", "certifyIfTrue", "certifyIfNotDisqualified",
        "summaryInfo", "financialStatementAuditStatus", "securitiesOfferedTypes",
        "offerDelayedContinuousFlag", "offeringYearFlag", "offeringAfterQualifFlag",
        "offeringBestEffortsFlag", "solicitationProposedOfferingFlag",
        "resaleSecuritiesAffiliatesFlag", "securitiesOffered", "outstandingSecurities",
        "pricePerSecurity", "issuerAggregateOffering", "securityHolderAggegate",
        "qualificationOfferingAggregate", "concurrentOfferingAggregate",
        "totalAggregateOffering", "underwritersServiceProviderName",
        "underwritersFees", "auditorServiceProviderName", "auditorFees",
        "legalServiceProviderName", "legalFees", "promotersServiceProviderName",
        "promotersFees", "brokerDealerCrdNumber", "estimatedNetAmount",
        "clarificationResponses", "jurisdictionsOfSecOfferedSame", "issueJuridicationSecuritiesOffering",
        "dealersJuridicationSecuritiesOffering", "securitiesIssuerName",
        "securitiesIssuerTitle", "securitiesIssuedTotalAmount", "securitiesPrincipalHolderAmount",
        "securitiesIssuedAggregateAmount", "securitiesActExcemption",
        "certifyIfBadActor", "salesCommissionsServiceProviderName",
        "salesCommissionsServiceProviderFees", "jurisdictionsOfSecOfferedNone",
        "ifUnregsiteredNone", "blueSkyServiceProviderName", "blueSkyFees",
        'indicateTier1Tier2Offering', 'X.1.A.A.', 'X.1.A.A.', 'aggregateConsiderationBasis',
        'findersFeesServiceProviderName' , 'finderFeesFee',
        'loans', 'propertyAndEquipment', 'deposits', 'totalInterestIncome',
        'totalInterestExpenses', 'securitiesOfferedOtherDesc', 'comment',
        "assetTypeNumber",
        "assetNumber",
        "assetGroupNumber",
        "reportPeriodBeginningDate",
        "reportPeriodEndDate",
        "issuerName",
        "originalIssuanceDate",
        "originalSecurityAmount",
        "originalSecurityTermNumber",
        "securityMaturityDate",
        "originalAmortizationTermNumber",
        "originalInterestRatePercentage",
        "accrualTypeCode",
        "interestRateTypeCode",
        "originalInterestOnlyTermNumber",
        "firstPaymentDate",
        "underwritingIndicator",
        "securityTitleName",
        "denominationNumber",
        "currencyName",
        "trusteeName",
        "secFileNumber",
        "cik",
        "callableIndicator",
        "paymentFrequencyCode",
        "zeroCouponIndicator",
        "assetAddedIndicator",
        "assetModifiedIndicator",
        "reportPeriodBeginningAssetBalanceAmount",
        "reportPeriodBeginningScheduledAssetBalanceAmount",
        "reportPeriodScheduledPaymentAmount",
        "reportPeriodInterestRatePercentage",
        "totalActualPaidAmount",
        "actualInterestCollectionPercentage",
        "actualPrincipalCollectedAmount",
        "actualOtherCollectionAmount",
        "otherPrincipalAdjustmentAmount",
        "otherInterestAdjustmentAmount",
        "scheduledInterestAmount",
        "scheduledPrincipalAmount",
        "endReportingPeriodActualBalanceAmount",
        "endReportingPeriodScheduledBalanceAmount",
        "servicingFeePercentage",
        "servicingFlatFeeAmount",
        "zeroBalanceCode",
        "zeroBalanceEffectiveDate",
        "remainingTermToMaturityNumber",
        "currentDelinquentStatusNumber",
        "paymentPastDueDaysNumber",
        "paymentPastDueNumber",
        "nextReportPeriodPaymentDueAmount",
        "nextDueDate",
        "primaryLoanServicerName",
        "mostRecentServicingTransferReceivedDate",
        "assetSubjectToDemandIndicator",
        "statusAssetSubjectToDemandCode",
        "repurchaseAmount",
        "demandResolutionDate",
        "repurchaserName",
        "repurchaseReplacementReasonCode",
        "reportPeriodBeginDate",
        "originalLoanPurposeCode",
        "originatorName",
        "originalLoanAmount",
        "originalLoanMaturityDate",
        "originalInterestRateTypeCode",
        "originalLienPositionCode",
        "mostRecentJuniorLoanBalanceAmount",
        "mostRecentJuniorLoanBalanceDate",
        "mostRecentSeniorLoanAmount",
        "mostRecentSeniorLoanAmountDate",
        "loanTypeMostSeniorLienCode",
        "mostSeniorLienHybridPeriodNumber",
        "mostSeniorLienNegativeAmortizationLimitPercentage",
        "mostSeniorLienOriginationDate",
        "prepaymentPenaltyIndicator",
        "negativeAmortizationIndicator",
        "modificationIndicator",
        "modificationNumber",
        "mortgageInsuranceRequirementIndicator",
        "balloonIndicator",
        "coveredHighCostCode",
        "servicerHazardInsuranceCode",
        "refinanceCashOutAmount",
        "totalOriginationDiscountAmount",
        "brokerIndicator",
        "channelCode",
        "nationalMortgageLicenseSystemCompanyNumber",
        "buyDownNumber",
        "loanDelinquencyAdvanceNumber",
        "originationARMIndexCode",
        "armMarginPercentage",
        "fullyIndexedRatePercentage",
        "initialFixedRatePeriodHybridARMNumber",
        "initialInterestRateDecreasePercentage",
        "initialInterestRateIncreasePercentage",
        "indexLookbackNumber",
        "subsequentInterestRateResetNumber",
        "lifetimeRateCeilingPercentage",
        "lifetimeRateFloorPercentage",
        "subsequentInterestRateDecreasePercentage",
        "subsequentInterestRateIncreasePercentage",
        "subsequentPaymentResetNumber",
        "armRoundCode",
        "armRoundPercentage",
        "optionArmIndicator",
        "paymentMethodAfterRecastCode",
        "initialMinimumPaymentAmount",
        "convertibleIndicator",
        "HELOCIndicator",
        "HELOCDrawNumber",
        "prepaymentPenaltyCalculationCode",
        "prepaymentPenaltyTypeCode",
        "prepaymentPenaltyTotalTermNumber",
        "prepaymentPenaltyHardTermNumber",
        "negativeAmortizationLimitAmount",
        "negativeAmortizationInitialRecastNumber",
        "negativeAmortizationSubsequentRecastNumber",
        "negativeAmortizationBalanceAmount",
        "initialFixedPaymentNumber",
        "initialPaymentCapPercentage",
        "subsequentPaymentCapPercentage",
        "initialMinimumPaymentResetNumber",
        "subsequentMinimumPaymentResetNumber",
        "minimumPaymentAmount",
        "geographicalLocation",
        "occupancyStatusCode",
        "mostRecentOccupancyStatusCode",
        "propertyTypeCode",
        "mostRecentPropertyValueAmount",
        "mostRecentPropertyValueTypeCode",
        "mostRecentPropertyValueDate",
        "mostRecentAVMModelCode",
        "mostRecentAVMConfidenceNumber",
        "originalCLTVPercentage",
        "originalLTVPercentage",
        "originalObligorNumber",
        "originalObligorCreditScoreNumber",
        "originalObligorCreditScoreType",
        "mostRecentObligorCreditScoreNumber",
        "mostRecentObligorCreditScoreType",
        "mostRecentObligorCreditScoreDate",
        "obligorIncomeVerificationLevelCode",
        "IRSForm4506TIndicator",
        "originatorFrontEndDTIPercentage",
        "originatorBackEndDTIPercentage",
        "obligorEmploymentVerificationCode",
        "obligorEmploymentLengthCode",
        "obligorAssetVerificationCode",
        "originalPledgedAssetsAmount",
        "qualificationMethodCode",
        "mortgageInsuranceCompanyName",
        "mortgageInsuranceCoveragePercentage",
        "poolInsuranceCompanyName",
        "poolInsuranceStopLossPercentage",
        "mortgageInsuranceCoverageTypeCode",
        "modificationIndicatorReportingPeriod",
        "nextPaymentDueDate",
        "advancingMethodCode",
        "servicingAdvanceMethodologyCode",
        "stopPrincipalInterestAdvancingDate",
        "reportingPeriodBeginningLoanBalanceAmount",
        "reportingPeriodBeginningScheduledLoanBalanceAmount",
        "nextReportingPeriodPaymentDueAmount",
        "reportingPeriodInterestRatePercentage",
        "nextInterestRatePercentage",
        "otherAssessedUncollectedServicerFeeamount",
        "otherServicingFeeRetainedByServicerAmount",
        "reportingPeriodEndActualBalanceAmount",
        "reportingPeriodEndScheduledBalanceAmount",
        "reportingPeriodScheduledPaymentAmount",
        "actualInterestCollectedAmount",
        "actualOtherCollectedAmount",
        "paidThroughDate",
        "interestPaidThroughDate",
        "paidFullAmount",
        "servicerAdvancedPrincipalAmount",
        "servicerAdvancedRepaidPrincipalAmount",
        "servicerAdvancedCumulativePrincipalAmount",
        "servicerAdvanceInterestAmount",
        "servicerAdvanceRepaidInterestAmount",
        "servicerAdvanceCumulativeInterestAmount",
        "servicerAdvanceTaxesInsuranceAmount",
        "servicerAdvanceRepaidTaxesInsuranceAmount",
        "servicerAdvanceCumulativeTaxesInsuranceAmount",
        "servicerAdvanceCorporateAmount",
        "servicerAdvanceRepaidCorporateAmount",
        "servicerAdvanceCumulativeCorporateAmount",
        "mostRecentTwelveMonthHistoryCode",
        "nextResetRatePercentage",
        "nextPaymentChangeDate",
        "nextInterestRateChangeDate",
        "nextResetPaymentAmount",
        "exercisedArmConversionOptionIndicator",
        "primaryServicerName",
        "masterServicerName",
        "specialServicerName",
        "subServicerName",
        "assetSubjectDemandIndicator",
        "assetSubjectDemandStatusCode",
        "repurchaseReplacementCode",
        "chargeOffPrincipalAmount",
        "chargeOffInterestAmount",
        "lossMitigationTypeCode",
        "mostRecentLoanModificationEventCode",
        "mostRecentLoanModificationEffectiveDate",
        "postModificationMaturityDate",
        "postModificationInterestRateTypeCode",
        "postModificationAmortizationTypeCode",
        "postModificationInterestPercentage",
        "postModificationFirstPaymentDate",
        "postModificationLoanBalanceAmount",
        "postModificationPrincipalInterestPaymentAmount",
        "totalCapAmount",
        "incomeVerificationIndicatorAtModification",
        "modificationFrontEndDebtToIncomePercentage",
        "modificationBackEndDebtToIncomePercentage",
        "totalDeferredAmount",
        "forgivenPrincipalCumulativeAmount",
        "forgivenPrincipalReportingPeriodAmount",
        "forgivenInterestCumulativeAmount",
        "forgivenInterestReportingPeriodAmount",
        "actualEndingBalanceTotalDebtAmount",
        "scheduledEndingBalanceTotalDebtAmount",
        "postModificationARMCode",
        "postModificationARMIndexCode",
        "postModificationMarginPercentage",
        "postModificationInterestResetNumber",
        "postModificationNextResetDate",
        "postModificationIndexLookbackNumber",
        "postModificationARMRoundingCode",
        "postModificationARMRoundingPercentage",
        "postModificationInitialMinimumPayment",
        "postModificationNextPaymentAdjustmentDate",
        "postModificationARMPaymentRecastFrequency",
        "postModificationLifetimeFloorPercentage",
        "postModificationLifetimeCeilingPercentage",
        "postModificationInitialInterestRateIncreasePercentage",
        "postModificationInitialInterestRateDecreasePercentage",
        "postModificationSubsequentInterestIncreasePercentage",
        "postModificationSubsequentInterestRateDecreasePercentage",
        "postModificationPaymentCapPercentage",
        "postModificationPaymentMethodAfterRecastCode",
        "postModificationARMInterestRateTeaserNumber",
        "postModificationARMPaymentTeaserNumber",
        "postModificationARMNegativeAmortizationIndicator",
        "postModificationARMNegativeAmortizationCapPercentage",
        "postModificationInterestOnlyTermNumber",
        "postModificationInterestOnlyLastPaymentDate",
        "postModificationBalloonAmount",
        "postModificationInterestRateStepIndicator",
        "postModificationStepInterestPercentage",
        "postModificationStepDate",
        "postModificationStepPrincipalInterestPaymentAmount",
        "postModificationStepNumber",
        "postModificationMaximumFutureStepAgreementPercentage",
        "postModificationMaximumStepAgreementRateDate",
        "nonInterestBearingDeferredPrincipalCumulativeAmount",
        "nonInterestBearingDeferredPrincipalReportingPeriodAmount",
        "recoveryDeferredPrincipalReportingPeriodAmount",
        "nonInterestBearingDeferredPaidFullAmount",
        "nonInterestBearingDeferredInterestFeeReportingPeriodAmount",
        "nonInterestBearingDeferredInterestFeeCumulativeAmount",
        "recoveryDeferredInterestFeeReportingPeriodAmount",
        "mostRecentForbearancePlanOrTrialModificationStartDate",
        "mostRecentForbearancePlanOrTrialModificationScheduledEndDate",
        "mostRecentTrialModificationViolatedDate",
        "mostRecentRepaymentPlanStartDate",
        "mostRecentRepaymentPlanScheduledEndDate",
        "mostRecentRepaymentPlanViolatedDate",
        "shortSaleAcceptedOfferAmount",
        "mostRecentLossMitigationExitDate",
        "mostRecentLossMitigationExitCode",
        "attorneyReferralDate",
        "foreclosureDelayReasonCode",
        "foreclosureExitDate",
        "foreclosureExitReasonCode",
        "noticeOfIntentDate",
        "mostRecentAcceptedREOOfferAmount",
        "mostRecentAcceptedREOOfferDate",
        "grossLiquidationProceedsAmount",
        "netSalesProceedsAmount",
        "reportingPeriodLossPassedToIssuingEntityAmount",
        "cumulativeTotalLossPassedToIssuingEntityAmount",
        "subsequentRecoveryAmount",
        "evictionIndicator",
        "reoExitDate",
        "reoExitReasonCode",
        "UPBLiquidationAmount",
        "servicingFeesClaimedAmount",
        "servicerAdvanceReimbursedPrincipalAmount",
        "servicerAdvanceReimbursedInterestAmount",
        "servicerAdvanceReimbursedTaxesInsuranceAmount",
        "servicerAdvanceReimbursedCorporateAmount",
        "REOManagementFeesAmount",
        "cashKeyDeedAmount",
        "performanceIncentiveFeesAmount",
        "mortgageInsuranceClaimFiledDate",
        "mortgageInsuranceClaimAmount",
        "mortgageInsuranceClaimPaidDate",
        "mortgageInsuranceClaimPaidAmount",
        "mortgageInsuranceClaimDeniedRescindedDate",
        "marketableTitleTransferDate",
        "nonPayStatusCode",
        "reportingActionCode",
        "GroupID",
        "reportingPeriodBeginningDate",
        "reportingPeriodEndDate",
        "originationDate",
        "originalTermLoanNumber",
        "maturityDate",
        "interestRateSecuritizationPercentage",
        "interestAccrualMethodCode",
        "firstLoanPaymentDueDate",
        "lienPositionSecuritizationCode",
        "loanStructureCode",
        "paymentTypeCode",
        "periodicPrincipalAndInterestPaymentSecuritizationAmount",
        "scheduledPrincipalBalanceSecuritizationAmount",
        "NumberPropertiesSecuritization",
        "NumberProperties",
        "graceDaysAllowedNumber",
        "interestOnlyIndicator",
        "prepaymentPremiumIndicator",
        "modifiedIndicator",
        "armIndexCode",
        "firstRateAdjustmentDate",
        "firstPaymentAdjustmentDate",
        "armMarginNumber",
        "lifetimeRateCapPercentage",
        "periodicRateIncreaseLimitPercentage",
        "periodicRateDecreaseLimitPercentage",
        "periodicPaymentAdjustmentMaximumAmount",
        "periodicPaymentAdjustmentMaximumPercent",
        "rateResetFrequencyCode",
        "paymentResetFrequencyCode",
        "indexLookbackDaysNumber",
        "prepaymentLockOutEndDate",
        "yieldMaintenanceEndDate",
        "prepaymentPremiumsEndDate",
        "maximumNegativeAmortizationAllowedPercentage",
        "maximumNegativeAmortizationAllowedAmount",
        "negativeAmortizationDeferredInterestCapAmount",
        "deferredInterestCumulativeAmount",
        "deferredInterestCollectedAmount",
        "property",
        "reportPeriodModificationIndicator",
        "reportPeriodBeginningScheduleLoanBalanceAmount",
        "totalScheduledPrincipalInterestDueAmount",
        "servicerTrusteeFeeRatePercentage",
        "unscheduledPrincipalCollectedAmount",
        "reportPeriodEndActualBalanceAmount",
        "reportPeriodEndScheduledLoanBalanceAmount",
        "hyperAmortizingDate",
        "servicingAdvanceMethodCode",
        "nonRecoverabilityIndicator",
        "totalPrincipalInterestAdvancedOutstandingAmount",
        "totalTaxesInsuranceAdvancesOutstandingAmount",
        "otherExpensesAdvancedOutstandingAmount",
        "paymentStatusLoanCode",
        "armIndexRatePercentage",
        "nextInterestRateChangeAdjustmentDate",
        "nextPaymentAdjustmentDate",
        "mostRecentSpecialServicerTransferDate",
        "mostRecentMasterServicerReturnDate",
        "realizedLossToTrustAmount",
        "liquidationPrepaymentCode",
        "liquidationPrepaymentDate",
        "prepaymentPremiumYieldMaintenanceReceivedAmount",
        "workoutStrategyCode",
        "lastModificationDate",
        "modificationCode",
        "postModificationPaymentAmount",
        "postModificationAmortizationPeriodAmount",
        "propertyName",
        "propertyAddress",
        "propertyCity",
        "propertyState",
        "propertyZip",
        "propertyCounty",
        "netRentableSquareFeetNumber",
        "netRentableSquareFeetSecuritizationNumber",
        "unitsBedsRoomsNumber",
        "unitsBedsRoomsSecuritizationNumber",
        "yearBuiltNumber",
        "yearLastRenovated",
        "valuationSecuritizationAmount",
        "valuationSourceSecuritizationCode",
        "valuationSecuritizationDate",
        "mostRecentValuationAmount",
        "mostRecentValuationDate",
        "mostRecentValuationSourceCode",
        "physicalOccupancySecuritizationPercentage",
        "mostRecentPhysicalOccupancyPercentage",
        "propertyStatusCode",
        "defeasanceOptionStartDate",
        "DefeasedStatusCode",
        "largestTenant",
        "squareFeetLargestTenantNumber",
        "leaseExpirationLargestTenantDate",
        "secondLargestTenant",
        "squareFeetSecondLargestTenantNumber",
        "leaseExpirationSecondLargestTenantDate",
        "thirdLargestTenant",
        "squareFeetThirdLargestTenantNumber",
        "leaseExpirationThirdLargestTenantDate",
        "financialsSecuritizationDate",
        "mostRecentFinancialsStartDate",
        "mostRecentFinancialsEndDate",
        "revenueSecuritizationAmount",
        "mostRecentRevenueAmount",
        "operatingExpensesSecuritizationAmount",
        "operatingExpensesAmount",
        "netOperatingIncomeSecuritizationAmount",
        "mostRecentNetOperatingIncomeAmount",
        "netCashFlowFlowSecuritizationAmount",
        "mostRecentNetCashFlowAmount",
        "netOperatingIncomeNetCashFlowSecuritizationCode",
        "netOperatingIncomeNetCashFlowCode",
        "mostRecentDebtServiceAmount",
        "debtServiceCoverageNetOperatingIncomeSecuritizationPercentage",
        "mostRecentDebtServiceCoverageNetOperatingIncomePercentage",
        "debtServiceCoverageNetCashFlowSecuritizationPercentage",
        "mostRecentDebtServiceCoverageNetCashFlowpercentage",
        "debtServiceCoverageSecuritizationCode",
        "mostRecentDebtServiceCoverageCode",
        "mostRecentAnnualLeaseRolloverReviewDate",
        "reportingPeriodEndingDate",
        "originalLoanTerm",
        "loanMaturityDate",
        "interestCalculationTypeCode",
        "originalFirstPaymentDate",
        "gracePeriodNumber",
        "subvented",
        "vehicleManufacturerName",
        "vehicleModelName",
        "vehicleNewUsedCode",
        "vehicleModelYear",
        "vehicleTypeCode",
        "vehicleValueAmount",
        "vehicleValueSourceCode",
        "obligorCreditScoreType",
        "obligorCreditScore",
        "coObligorIndicator",
        "paymentToIncomePercentage",
        "obligorGeographicLocation",
        "reportingPeriodModificationIndicator",
        "nextReportingPeriodPaymentAmountDue",
        "otherServicerFeeRetainedByServicer",
        "otherAssessedUncollectedServicerFeeAmount",
        "reportingPeriodActualEndBalanceAmount",
        "totalActualAmountPaid",
        "servicerAdvancedAmount",
        "currentDelinquencyStatus",
        "chargedoffPrincipalAmount",
        "recoveredAmount",
        "modificationTypeCode",
        "paymentExtendedNumber",
        "repossessedIndicator",
        "repossessedProceedsAmount",
        "reportingPeriodBeginDate",
        "acquisitionCost",
        "originalLeaseTermNumber",
        "scheduledTerminationDate",
        "gracePeriod",
        "baseResidualValue",
        "baseResidualSourceCode",
        "contractResidualValue",
        "lesseeCreditScoreType",
        "lesseeCreditScore",
        "lesseeIncomeVerificationLevelCode",
        "lesseeEmploymentVerificationCode",
        "coLesseePresentIndicator",
        "lesseeGeographicLocation",
        "remainingTermNumber",
        "reportingPeriodSecuritizationValueAmount",
        "securitizationDiscountRate",
        "otherLeaseLevelServicingFeesRetainedAmount",
        "reportingPeriodEndingActualBalanceAmount",
        "reportingPeriodEndActualSecuritizationAmount",
        "primaryLeaseServicerName",
        "DemandResolutionDate",
        "repurchaseOrReplacementReasonCode",
        "chargedOffAmount",
        "leaseExtended",
        "terminationIndicator",
        "excessFeeAmount",
        "liquidationProceedsAmount",
        "commentNumber", "commentColumn", "commentDescription",
        'previousAccessionNumber', 'itemNumber', 'fieldName', 'notes', 'sequenceNumber',
        "amendmentNo",
        "amendmentType",
        "confDeniedExpired",
        'additionalInformation',
        'fileNumber'
      ),
      nameActual = c(
        "priceExerciseConversion",
        "dateDeemedExecution",
        "codeOwnershipDirectIndirect",
        "idDocument",
        "isEquitySwapInvolved",
        "dateExercised",
        "dateExpiration",
        "descriptionFootnote",
        "isDirector",
        "isOfficer",
        "isOther",
        "idCIKIssuer",
        "nameIssuer",
        "idTickerIssuer",
        "isTenPercentOwner",
        "descriptionNatureOfOwnership",
        "isNoSecuritiesOwned",
        "isNotSubjectToSection16",
        "titleOfficer",
        "descriptionOtherText",
        "dateReport",
        "countSharesOwnedPostTransaction",
        "descriptionRemarks",
        "idCIKOwner",
        "cityOwenr",
        "nameOwner",
        "stateOwner",
        "descriptionStateOwner",
        "addressStreet1Owner",
        "addressStreet2Owner",
        "zipcodeOwner",
        "idSchema",
        "titleSecurity",
        "countSharesOwnedPostTransaction",
        "dateSignature",
        "nameSignature",
        "codeTransactionAcquiredDisposed",
        "codeTransaction",
        "dateTransaction",
        "idFormTransaction",
        "pricePerShareTransaction",
        "countSharesTransaction",
        "idCodeTimelinessTransaction",
        "amountTransaction",
        "countSharesUnderlying",
        "titleSecurityUnderlying",
        "descriptionResponse", "isBusinessCombinationTransaction",
        "idCIK", "isMoreThanOneYear", "nameEntityPrevius", "listNameEntityPreviousEDGAR",
        "nameEntity", "typeEntity", "descriptionEntityTypeOther", "idFederalExemptionsExclusions",
        "typeIndustryGroup", "typeInvestmentFund", "descriptionInvestmentFund",
        "hasNonAccreditedInvestors", "countInvestorsNonAccredited",
        "countInvestorsActive", "cityEntity", "stateEntity", "descriptionStateEntity",
        "addressStreet1Entity", "addressStreet2Entity", "zipcodeEntity", "phoneNumberEntity", "listIssuerPreviousName",
        "jurisdictionOfInc", "isOverFiveYearsOld", "hasYearOfInc", "isFormedWithinFiveYears",
        "isYetToBeFormed", "rangeAgregateNetAssetValue", "rangeRevenue",
        "amountInvestmentMinimum", "amountSoldTotal", "amountOfferingTotal",
        "amountRemaining", "nameFirst", "nameLast", "nameMiddle", "relationshipEntity",
        "descriptionRelationship", "amountDollars", "isEstimate", "idCRDBroker",
        "nameBroker", "isForeignSolicitation", "idCRDRecipient",
        "nameRecipient", "stateDescription", "state", "listStatesSolicitation",
        "isAuthorizedRepresentative", "nameSignatory", "titleSignatory",
        "idForm", "codeTestOrLive", "dateFirstSale", "isYetToOccur",
        "isAmendment", "descriptionOtherType", "isDebtType", "isEquityType",
        "isMineralPropertyType", "isOptionToAcquireType", "isOtherType",
        "isPooledInvestmentFundType", "isSecurityToBeAcquiredType", "isTenantInCommonType",
        'isNotSubjectToSection16', 'addressStreet1Owner', 'addressStreet2Owner',
        "isLiveTestFlag", "isConfirmingCopyFlag", "isReturnCopyFlag", "isOverrideInternetFlag",
        "idCCC", "dateReportCalendarOrQuarter", "nameFilingManager", "addressStreet1FilingManager",
        "addressStreet2FilingManager", "cityFilingManager", "stateFilingManager",
        'descriptionStateFilingManager',
        "zipcodeFilingManager", "typeReport", "idSEC",
        "codeProvideInfoForInstruction5", "nameEntity", "titleEntity", "phoneEntity", "signatureEntity",
        "countOtherIncludedManagers", "countTableEntries", "amountValueHoldings",
        "isConfidentialOmitted", "nameIssuer", "classSecurities", "idCUSIP", "valueSecurities", "typeInvestmentDiscretion",
        "descriptionOtherManager", "codePutCall", "countSharesPrincipal", "codeSharesPrincipal", "countSharesVotingSole",
        "countSharesVotingShared", "countSharesVotingNone",

        "idSEC", "isSinceLastFiling", "codeJurisdictionOrganization",
        "yearIncorporation", "idSIC", "idIRS", "countEmployeesFullTime",
        "countEmployeesPartTime", "phoneEntity", "nameConnection", "nameIndustry",
        "amountCashEquivalents", "amountInvestmentSecurities", "amountAccountsReceivable",
        "amountPropertyPlantEquipment", "amountAssetsTotal", "amountAccountsPayable", "amountLongTermDebt",
        "amountLiabilitiesTotal", "amountStockholderEquityTotal", "amountLiabilitiesAndEquityTotal",
        "amountRevenuesTotal", "amountCostAndExpensesOfRevenue", "amountDepreciationAndAmortization",
        "amountNetIncome", "pershareEarningsBasic", "pershareEarningsDiluted",
        "nameAuditor", "nameCommonEquityClass", "amountCommonEquityOutstanding",
        "idCUSIPCommonEquity", "isCommonEquityPublic", "namePreferredEquityClass",
        "amountPreferredEquityOutstanding", "idCusipPreferrdEquity", "isdPreferredEquityPublic",
        "nameDebtSecuritiesClass", "amountOutstandingDebtSecurities", "idCUSIPDebtSecurities",
        "isDebtSecuritiesPublic", "isCertifyIfTrue", "isCertifyIfNotDisqualified",
        "codeTier1Tier2Offering", "codeFinancialStatementAuditStatus", "codeSecuritiesOfferedTypes",
        "codeOfferDelayedContinuous", "codeOfferingYearFlag", "codeOfferingAfterQualifFlag",
        "codeOfferingBestEffortsFlag", "codeSolicitationProposedOfferingFlag",
        "codeResaleSecuritiesAffiliates", "countSecuritiesOffered", "countSecuritiesOutstanding",
        "persharePrice", "amountOfferingIssuer", "amountOfferingExistingShareholdersSelling",
        "amountOfferingSold12MonthQualifiedOffering", "amountOfferingSoldConcurrent",
        "amountOfferingTotal", "nameUnderwritr",
        "amountUnderwritersFees", "nameAuditor", "amountAuditorFees",
        "nameLegal", "amountLegalFees", "namePromoter",
        "amountPromotersFees", "idCRDBroker", "amountOfferringProceedsNet",
        "descriptionResponse", "isJurisdictionsOfSecOfferedSame", "locatonJuridicationSecuritiesOffering",
        "locationDealersJuridicationSecuritiesOffering", "nameSecuritiesIssuer",
        "titleSecuritiesOffered", "amountSecuritiesIssued", "amountSecuritiesPrincipalHolder",
        "amountSecuritiesIssuedTotal", "nameSecuritiesActExemption",
        "isBadActor", "nameSalesCommissionsServiceProvider",
        "amountSalesCommissionsFees", "isJurisdictionsSecuritiesOfferingNone",
        "isUnRegisteredNone",
        "nameBlueSkyServiceProvider", "amountBlueSkyFees",
        'isTier1Tier2Offering', 'idForm', 'idForm', 'amountOfferingConsiderationBasis',
        'nameFindersFeeProvider' , 'amountFindersFee',
        'amountLoans', 'amountPropertyAndEquipment', 'amountDeposits', 'amountInterestIncomeTotal',
        'amountInterestExpenseTotal', 'descriptionOtherSecuritiesOffered',
        'commentFiling',
        "numberAssetType",
        "numberAsset",
        "numberAssetGroup",
        "dateReportPeriodBeginning",
        "dateReportPeriodEnd",
        "nameIssuer",
        "dateOriginalIssuance",
        "amountOriginalSecurity",
        "numberOriginalSecurityTerm",
        "dateSecurityMaturity",
        "numberOriginalAmortizationTerm",
        "percentageOriginalInterestRate",
        "codeAccrualType",
        "codeInterestRateType",
        "numberOriginalInterestOnlyTerm",
        "dateFirstPayment",
        "hasUnderwriting",
        "nameSecurityTitle",
        "numberDenomination",
        "nameCurrency",
        "nameTrustee",
        "numberSecFile",
        "idCIK",
        "hasCallable",
        "codePaymentFrequency",
        "hasZeroCoupon",
        "hasAssetAdded",
        "hasAssetModified",
        "amountReportPeriodBeginningAssetBalance",
        "amountReportPeriodBeginningScheduledAssetBalance",
        "amountReportPeriodScheduledPayment",
        "percentageReportPeriodInterestRate",
        "amountTotalActualPaid",
        "percentageActualInterestCollection",
        "amountActualPrincipalCollected",
        "amountActualOtherCollection",
        "amountOtherPrincipalAdjustment",
        "amountOtherInterestAdjustment",
        "amountScheduledInterest",
        "amountScheduledPrincipal",
        "amountEndReportingPeriodActualBalance",
        "amountEndReportingPeriodScheduledBalance",
        "percentageServicingFee",
        "amountServicingFlatFee",
        "codeZeroBalance",
        "dateZeroBalanceEffective",
        "numberRemainingTermToMaturity",
        "numberCurrentDelinquentStatus",
        "numberPaymentPastDueDays",
        "numberPaymentPastDue",
        "amountNextReportPeriodPaymentDue",
        "dateNextDue",
        "namePrimaryLoanServicer",
        "dateMostRecentServicingTransferReceived",
        "hasAssetSubjectToDemand",
        "codeStatusAssetSubjectToDemand",
        "amountRepurchase",
        "dateDemandResolution",
        "nameRepurchaser",
        "codeRepurchaseReplacementReason",
        "dateReportPeriodBegin",
        "codeOriginalLoanPurpose",
        "nameOriginator",
        "amountOriginalLoan",
        "dateOriginalLoanMaturity",
        "codeOriginalInterestRateType",
        "codeOriginalLienPosition",
        "amountMostRecentJuniorLoanBalance",
        "dateMostRecentJuniorLoanBalance",
        "amountMostRecentSeniorLoan",
        "dateMostRecentSeniorLoanAmount",
        "codeLoanTypeMostSeniorLien",
        "numberMostSeniorLienHybridPeriod",
        "percentageMostSeniorLienNegativeAmortizationLimit",
        "dateMostSeniorLienOrigination",
        "hasPrepaymentPenalty",
        "hasNegativeAmortization",
        "hasModification",
        "numberModification",
        "hasMortgageInsuranceRequirement",
        "hasBalloon",
        "codeCoveredHighCost",
        "codeServicerHazardInsurance",
        "amountRefinanceCashOut",
        "amountTotalOriginationDiscount",
        "hasBroker",
        "codeChannel",
        "numberNationalMortgageLicenseSystemCompany",
        "numberBuyDown",
        "numberLoanDelinquencyAdvance",
        "codeOriginationARMIndex",
        "percentageArmMargin",
        "percentageFullyIndexedRate",
        "numberInitialFixedRatePeriodHybridARM",
        "percentageInitialInterestRateDecrease",
        "percentageInitialInterestRateIncrease",
        "numberIndexLookback",
        "numberSubsequentInterestRateReset",
        "percentageLifetimeRateCeiling",
        "percentageLifetimeRateFloor",
        "percentageSubsequentInterestRateDecrease",
        "percentageSubsequentInterestRateIncrease",
        "numberSubsequentPaymentReset",
        "codeArmRound",
        "percentageArmRound",
        "hasOptionArm",
        "codePaymentMethodAfterRecast",
        "amountInitialMinimumPayment",
        "hasConvertible",
        "hasHELOC",
        "numberHELOCDraw",
        "codePrepaymentPenaltyCalculation",
        "codePrepaymentPenaltyType",
        "numberPrepaymentPenaltyTotalTerm",
        "numberPrepaymentPenaltyHardTerm",
        "amountNegativeAmortizationLimit",
        "numberNegativeAmortizationInitialRecast",
        "numberNegativeAmortizationSubsequentRecast",
        "amountNegativeAmortizationBalance",
        "numberInitialFixedPayment",
        "percentageInitialPaymentCap",
        "percentageSubsequentPaymentCap",
        "numberInitialMinimumPaymentReset",
        "numberSubsequentMinimumPaymentReset",
        "amountMinimumPayment",
        "locationGeographical",
        "codeOccupancyStatus",
        "codeMostRecentOccupancyStatus",
        "codePropertyType",
        "amountMostRecentPropertyValue",
        "codeMostRecentPropertyValueType",
        "dateMostRecentPropertyValue",
        "codeMostRecentAVMModel",
        "numberMostRecentAVMConfidence",
        "percentageOriginalCLTV",
        "percentageOriginalLTV",
        "numberOriginalObligor",
        "numberOriginalObligorCreditScore",
        "typeOriginalObligorCreditScore",
        "numberMostRecentObligorCreditScore",
        "typeMostRecentObligorCreditScore",
        "dateMostRecentObligorCreditScore",
        "codeObligorIncomeVerificationLevel",
        "hasIRSForm4506T",
        "percentageOriginatorFrontEndDTI",
        "percentageOriginatorBackEndDTI",
        "codeObligorEmploymentVerification",
        "codeObligorEmploymentLength",
        "codeObligorAssetVerification",
        "amountOriginalPledgedAssets",
        "codeQualificationMethod",
        "nameMortgageInsuranceCompany",
        "percentageMortgageInsuranceCoverage",
        "namePoolInsuranceCompany",
        "percentagePoolInsuranceStopLoss",
        "codeMortgageInsuranceCoverageType",
        "periodModificationHasReporting",
        "dateNextPaymentDue",
        "codeAdvancingMethod",
        "codeServicingAdvanceMethodology",
        "dateStopPrincipalInterestAdvancing",
        "amountReportingPeriodBeginningLoanBalance",
        "amountReportingPeriodBeginningScheduledLoanBalance",
        "amountNextReportingPeriodPaymentDue",
        "percentageReportingPeriodInterestRate",
        "percentageNextInterestRate",
        "feeamountOtherAssessedUncollectedServicer",
        "amountOtherServicingFeeRetainedByServicer",
        "amountReportingPeriodEndActualBalance",
        "amountReportingPeriodEndScheduledBalance",
        "amountReportingPeriodScheduledPayment",
        "amountActualInterestCollected",
        "amountActualOtherCollected",
        "datePaidThrough",
        "dateInterestPaidThrough",
        "amountPaidFull",
        "amountServicerAdvancedPrincipal",
        "amountServicerAdvancedRepaidPrincipal",
        "amountServicerAdvancedCumulativePrincipal",
        "amountServicerAdvanceInterest",
        "amountServicerAdvanceRepaidInterest",
        "amountServicerAdvanceCumulativeInterest",
        "amountServicerAdvanceTaxesInsurance",
        "amountServicerAdvanceRepaidTaxesInsurance",
        "amountServicerAdvanceCumulativeTaxesInsurance",
        "amountServicerAdvanceCorporate",
        "amountServicerAdvanceRepaidCorporate",
        "amountServicerAdvanceCumulativeCorporate",
        "codeMostRecentTwelveMonthHistory",
        "percentageNextResetRate",
        "dateNextPaymentChange",
        "dateNextInterestRateChange",
        "amountNextResetPayment",
        "hasExercisedArmConversionOption",
        "namePrimaryServicer",
        "nameMasterServicer",
        "nameSpecialServicer",
        "nameSubServicer",
        "hasAssetSubjectDemand",
        "codeAssetSubjectDemandStatus",
        "codeRepurchaseReplacement",
        "amountChargeOffPrincipal",
        "amountChargeOffInterest",
        "codeLossMitigationType",
        "codeMostRecentLoanModificationEvent",
        "dateMostRecentLoanModificationEffective",
        "datePostModificationMaturity",
        "codePostModificationInterestRateType",
        "codePostModificationAmortizationType",
        "percentagePostModificationInterest",
        "datePostModificationFirstPayment",
        "amountPostModificationLoanBalance",
        "amountPostModificationPrincipalInterestPayment",
        "amountTotalCap",
        "modificationIncomeVerificationHasAt",
        "percentageModificationFrontEndDebtToIncome",
        "percentageModificationBackEndDebtToIncome",
        "amountTotalDeferred",
        "amountForgivenPrincipalCumulative",
        "amountForgivenPrincipalReportingPeriod",
        "amountForgivenInterestCumulative",
        "amountForgivenInterestReportingPeriod",
        "amountActualEndingBalanceTotalDebt",
        "amountScheduledEndingBalanceTotalDebt",
        "codePostModificationARM",
        "codePostModificationARMIndex",
        "percentagePostModificationMargin",
        "numberPostModificationInterestReset",
        "datePostModificationNextReset",
        "numberPostModificationIndexLookback",
        "codePostModificationARMRounding",
        "percentagePostModificationARMRounding",
        "paymentPostModificationInitialMinimum",
        "datePostModificationNextPaymentAdjustment",
        "frequencyPostModificationARMPaymentRecast",
        "percentagePostModificationLifetimeFloor",
        "percentagePostModificationLifetimeCeiling",
        "percentagePostModificationInitialInterestRateIncrease",
        "percentagePostModificationInitialInterestRateDecrease",
        "percentagePostModificationSubsequentInterestIncrease",
        "percentagePostModificationSubsequentInterestRateDecrease",
        "percentagePostModificationPaymentCap",
        "codePostModificationPaymentMethodAfterRecast",
        "numberPostModificationARMInterestRateTeaser",
        "numberPostModificationARMPaymentTeaser",
        "hasPostModificationARMNegativeAmortization",
        "percentagePostModificationARMNegativeAmortizationCap",
        "numberPostModificationInterestOnlyTerm",
        "datePostModificationInterestOnlyLastPayment",
        "amountPostModificationBalloon",
        "hasPostModificationInterestRateStep",
        "percentagePostModificationStepInterest",
        "datePostModificationStep",
        "amountPostModificationStepPrincipalInterestPayment",
        "numberPostModificationStep",
        "percentagePostModificationMaximumFutureStepAgreement",
        "datePostModificationMaximumStepAgreementRate",
        "amountNonInterestBearingDeferredPrincipalCumulative",
        "amountNonInterestBearingDeferredPrincipalReportingPeriod",
        "amountRecoveryDeferredPrincipalReportingPeriod",
        "amountNonInterestBearingDeferredPaidFull",
        "amountNonInterestBearingDeferredInterestFeeReportingPeriod",
        "amountNonInterestBearingDeferredInterestFeeCumulative",
        "amountRecoveryDeferredInterestFeeReportingPeriod",
        "dateMostRecentForbearancePlanOrTrialModificationStart",
        "dateMostRecentForbearancePlanOrTrialModificationScheduledEnd",
        "dateMostRecentTrialModificationViolated",
        "dateMostRecentRepaymentPlanStart",
        "dateMostRecentRepaymentPlanScheduledEnd",
        "dateMostRecentRepaymentPlanViolated",
        "amountShortSaleAcceptedOffer",
        "dateMostRecentLossMitigationExit",
        "codeMostRecentLossMitigationExit",
        "dateAttorneyReferral",
        "codeForeclosureDelayReason",
        "dateForeclosureExit",
        "codeForeclosureExitReason",
        "dateNoticeOfIntent",
        "amountMostRecentAcceptedREOOffer",
        "dateMostRecentAcceptedREOOffer",
        "amountGrossLiquidationProceeds",
        "amountNetSalesProceeds",
        "amountReportingPeriodLossPassedToIssuingEntity",
        "amountCumulativeTotalLossPassedToIssuingEntity",
        "amountSubsequentRecovery",
        "hasEviction",
        "dateReoExit",
        "codeReoExitReason",
        "amountUPBLiquidation",
        "amountServicingFeesClaimed",
        "amountServicerAdvanceReimbursedPrincipal",
        "amountServicerAdvanceReimbursedInterest",
        "amountServicerAdvanceReimbursedTaxesInsurance",
        "amountServicerAdvanceReimbursedCorporate",
        "amountREOManagementFees",
        "amountCashKeyDeed",
        "amountPerformanceIncentiveFees",
        "dateMortgageInsuranceClaimFiled",
        "amountMortgageInsuranceClaim",
        "dateMortgageInsuranceClaimPaid",
        "amountMortgageInsuranceClaimPaid",
        "dateMortgageInsuranceClaimDeniedRescinded",
        "dateMarketableTitleTransfer",
        "codeNonPayStatus",
        "codeReportingAction",
        "idGroup",
        "dateReportingPeriodBeginning",
        "dateReportingPeriodEnd",
        "dateOrigination",
        "numberOriginalTermLoan",
        "dateMaturity",
        "percentageInterestRateSecuritization",
        "codeInterestAccrualMethod",
        "dateFirstLoanPaymentDue",
        "codeLienPositionSecuritization",
        "codeLoanStructure",
        "codePaymentType",
        "amountPeriodicPrincipalAndInterestPaymentSecuritization",
        "amountScheduledPrincipalBalanceSecuritization",
        "securitizationNumberProperties",
        "propertiesNumber",
        "numberGraceDaysAllowed",
        "hasInterestOnly",
        "hasPrepaymentPremium",
        "hasModified",
        "codeArmIndex",
        "dateFirstRateAdjustment",
        "dateFirstPaymentAdjustment",
        "numberArmMargin",
        "percentageLifetimeRateCap",
        "percentagePeriodicRateIncreaseLimit",
        "percentagePeriodicRateDecreaseLimit",
        "amountPeriodicPaymentAdjustmentMaximum",
        "percentPeriodicPaymentAdjustmentMaximum",
        "codeRateResetFrequency",
        "codePaymentResetFrequency",
        "numberIndexLookbackDays",
        "datePrepaymentLockOutEnd",
        "dateYieldMaintenanceEnd",
        "datePrepaymentPremiumsEnd",
        "percentageMaximumNegativeAmortizationAllowed",
        "amountMaximumNegativeAmortizationAllowed",
        "amountNegativeAmortizationDeferredInterestCap",
        "amountDeferredInterestCumulative",
        "amountDeferredInterestCollected",
        "propertyProperty",
        "hasReportPeriodModification",
        "amountReportPeriodBeginningScheduleLoanBalance",
        "amountTotalScheduledPrincipalInterestDue",
        "percentageServicerTrusteeFeeRate",
        "amountUnscheduledPrincipalCollected",
        "amountReportPeriodEndActualBalance",
        "amountReportPeriodEndScheduledLoanBalance",
        "dateHyperAmortizing",
        "codeServicingAdvanceMethod",
        "hasNonRecoverability",
        "amountTotalPrincipalInterestAdvancedOutstanding",
        "amountTotalTaxesInsuranceAdvancesOutstanding",
        "amountOtherExpensesAdvancedOutstanding",
        "codePaymentStatusLoan",
        "percentageArmIndexRate",
        "dateNextInterestRateChangeAdjustment",
        "dateNextPaymentAdjustment",
        "dateMostRecentSpecialServicerTransfer",
        "dateMostRecentMasterServicerReturn",
        "amountRealizedLossToTrust",
        "codeLiquidationPrepayment",
        "dateLiquidationPrepayment",
        "amountPrepaymentPremiumYieldMaintenanceReceived",
        "codeWorkoutStrategy",
        "dateLastModification",
        "codeModification",
        "amountPostModificationPayment",
        "amountPostModificationAmortizationPeriod",
        "nameProperty",
        "addressProperty",
        "cityProperty",
        "stateProperty",
        "zipcodeProperty",
        "countyProperty",
        "numberNetRentableSquareFeet",
        "numberNetRentableSquareFeetSecuritization",
        "numberUnitsBedsRooms",
        "numberUnitsBedsRoomsSecuritization",
        "yearBuilt",
        "yearLastRenovated",
        "amountValuationSecuritization",
        "codeValuationSourceSecuritization",
        "dateValuationSecuritization",
        "amountMostRecentValuation",
        "dateMostRecentValuation",
        "codeMostRecentValuationSource",
        "percentagePhysicalOccupancySecuritization",
        "percentageMostRecentPhysicalOccupancy",
        "codePropertyStatus",
        "dateDefeasanceOptionStart",
        "codeDefeasedStatus",
        "tenantLargest",
        "numberSquareFeetLargestTenant",
        "dateLeaseExpirationLargestTenant",
        "tenantSecondLargest",
        "numberSquareFeetSecondLargestTenant",
        "dateLeaseExpirationSecondLargestTenant",
        "tenantThirdLargest",
        "numberSquareFeetThirdLargestTenant",
        "dateLeaseExpirationThirdLargestTenant",
        "dateFinancialsSecuritization",
        "dateMostRecentFinancialsStart",
        "dateMostRecentFinancialsEnd",
        "amountRevenueSecuritization",
        "amountMostRecentRevenue",
        "amountOperatingExpensesSecuritization",
        "amountOperatingExpenses",
        "amountNetOperatingIncomeSecuritization",
        "amountMostRecentNetOperatingIncome",
        "amountNetCashFlowFlowSecuritization",
        "amountMostRecentNetCashFlow",
        "codeNetOperatingIncomeNetCashFlowSecuritization",
        "codeNetOperatingIncomeNetCashFlow",
        "amountMostRecentDebtService",
        "percentageDebtServiceCoverageNetOperatingIncomeSecuritization",
        "percentageMostRecentDebtServiceCoverageNetOperatingIncome",
        "percentageDebtServiceCoverageNetCashFlowSecuritization",
        "percentageMostRecentDebtServiceCoverageNetCash",
        "codeDebtServiceCoverageSecuritization",
        "codeMostRecentDebtServiceCoverage",
        "dateMostRecentAnnualLeaseRolloverReview",
        "dateReportingPeriodEnding",
        "termOriginalLoan",
        "dateLoanMaturity",
        "codeInterestCalculationType",
        "dateOriginalFirstPayment",
        "numberGracePeriod",
        "subventedSubvented",
        "nameVehicleManufacturer",
        "nameVehicleModel",
        "codeVehicleNewUsed",
        "yearVehicleModel",
        "codeVehicleType",
        "amountVehicleValue",
        "codeVehicleValueSource",
        "typeObligorCreditScore",
        "scoreObligorCredit",
        "hasCoObligor",
        "percentagePaymentToIncome",
        "locationObligorGeographic",
        "hasReportingPeriodModification",
        "amountPaymentDueNextReportingPeriod",
        "servicerOtherServicerFeeRetainedBy",
        "amountOtherAssessedUncollectedServicerFee",
        "amountReportingPeriodActualEndBalance",
        "amountPaidTotalActual",
        "amountServicerAdvanced",
        "isDelinquent",
        "amountChargedoffPrincipal",
        "amountRecovered",
        "codeModificationType",
        "numberPaymentExtended",
        "hasRepossessed",
        "amountRepossessedProceeds",
        "dateReportingPeriodBegin",
        "costAcquisition",
        "numberOriginalLeaseTerm",
        "dateScheduledTermination",
        "periodGrace",
        "valueBaseResidual",
        "codeBaseResidualSource",
        "valueContractResidual",
        "typeLesseeCreditScore",
        "scoreLesseeCredit",
        "codeLesseeIncomeVerificationLevel",
        "codeLesseeEmploymentVerification",
        "hasCoLesseePresent",
        "locationLesseeGeographic",
        "numberRemainingTerm",
        "amountReportingPeriodSecuritizationValue",
        "rateSecuritizationDiscount",
        "amountOtherLeaseLevelServicingFeesRetained",
        "amountReportingPeriodEndingActualBalance",
        "amountReportingPeriodEndActualSecuritization",
        "namePrimaryLeaseServicer",
        "dateDemandResolution",
        "codeRepurchaseOrReplacementReason",
        "amountChargedOff",
        "extendedLease",
        "hasTermination",
        "amountExcessFee",
        "amountLiquidationProceeds",
        "detailNumberComment", "columnComment", "descriptionComment",
        'idAccessionPrevious',
        'numberItem', 'nameField', 'descriptionNotes', 'idSequence',
        "numberAmendment",
        "typeAmendmentType",
        "confDeniedExpired",
        'descriptionInformationAdditional',
        'numberFile'
      )
    )}

.filer_type_df <-
  function() {
    tibble(
      idTypeFilerOwner = c(
        'insider',
        'private' ,
        'broker_dealer',
        'transfer_agent',
        'ia',
        'msd',
        'bank',
        'inv_co'
      ),
      typeFilerOwner = c(
        'Insider',
        'Private Placement',
        'Broker Dealer',
        'Transfer Agent',
        'Investment Advisor',
        'Bank',
        'Municipal Securities Dealer',
        'Investment Company'
      )
    ) %>%
      mutate_all(str_to_upper)
  }

#' Form-D dictionary
#'
#' This function returns searchable
#' industries for parsed SEC Form-D
#' filings
#'
#' @return a \code{tibble}
#' @export
#' @import dplyr
#' @examples
#' dictionary_form_d_categories()
dictionary_form_d_categories <-
  function() {
    category_df <-
      dplyr::tibble(
        idIndustry = 1:35,
        nameIndustry = c(
          "AGRICULTURE",
          "AIRLINES AND AIRPORTS",
          "BIOTECHNOLOGY",
          "BUSINESS SERVICES",
          "COAL MINING",
          "COMMERCIAL REAL ESTATE",
          "COMMERCIAL BANKING",
          "COMPUTERS",
          "CONSTRUCTION",
          "ELECTRIC UTILITIES",
          "ENERGY CONSERVATION",
          "ENVIORNMENTAL SERVICES",
          "HEALTH INSURANCE",
          "HOSPITALS AND PHYSICIANS",
          "INSURANCE",
          "INVESTING",
          "INVESTMENT BANKING",
          "LODGING AND CONVETION",
          "MANUFACTURING",
          "OIL AND GAS",
          "OTHER",
          "OTHER BANKING AND FINANCIAL SERVICES",
          "OTHER ENERGY",
          "OTHER HEALTH CARE",
          "OTHER REAL ESTATE",
          "OTHER TECHNOLOGY",
          "OTHER TRAVEL",
          "PHARMACEUTICALS",
          "POOLED INVESTMENT FUND",
          "REITS AND FINANCE",
          "RESIDENTIAL REAL ESTATE",
          "RESTAURANTS",
          "RETAIL",
          "TELECOMMUNICATIONS",
          "TRAVEL AND TOURISM"
        ),
        codeIndustryParent = c(
          "OTHER",
          "TRAVEL",
          "HEALTH",
          "OTHER",
          "ENERGY",
          "REAL",
          "FINANCE",
          "TECH",
          "REAL",
          "ENERGY",
          "ENERGY",
          "ENERGY",
          "HEALTH",
          "HEALTH",
          "FINANCE",
          "FINANCE",
          "FINANCE",
          "TRAVEL",
          "OTHER",
          "ENERGY",
          "OTHER",
          "FINANCE",
          "ENERGY",
          "HEALTH",
          "REAL",
          "TECH",
          "TRAVEL",
          "HEALTH",
          "FINANCE",
          "REAL",
          "REAL",
          "OTHER",
          "OTHER",
          "TECH",
          "TRAVEL"
        ),
        nameIndustryParent = c(
          "OTHER",
          "TRAVEL AND LEISURE",
          "HEALTHCARE",
          "OTHER",
          "ENERGY",
          "REAL ESTATE",
          "FINANCIAL",
          "TECHNOLOGY",
          "REAL ESTATE",
          "ENERGY",
          "ENERGY",
          "ENERGY",
          "HEALTHCARE",
          "HEALTHCARE",
          "FINANCIAL",
          "FINANCIAL",
          "FINANCIAL",
          "TRAVEL AND LEISURE",
          "OTHER",
          "ENERGY",
          "OTHER",
          "FINANCIAL",
          "ENERGY",
          "HEALTHCARE",
          "REAL ESTATE",
          "TECHNOLOGY",
          "TRAVEL AND LEISURE",
          "HEALTHCARE",
          "FINANCIAL",
          "REAL ESTATE",
          "REAL ESTATE",
          "OTHER",
          "OTHER",
          "TECHNOLOGY",
          "TRAVEL AND LEISURE"
        )
      )
    return(category_df)
  }

.insider_code_df <-
  function() {
    insider_df <-
      tibble(
        idInsiderTransaction =
          c(
            "A",
            "C",
            "D",
            "F",
            "G",
            "H",
            "I",
            "J",
            "K",
            "L",
            "M",
            "NONE",
            "O",
            "P",
            "S",
            "U",
            "V",
            "W",
            "X",
            "Z"
          ),
        nameInsiderTransaction = c(
          "AWARD",
          "CONVEYANCE",
          "DISPOSITION TO ISSUER",
          "PAYMENT WITH SECURITIES",
          "GIFT",
          "EXPIRATION OF LONG DERIVATIVE POSITION",
          "DISCRETIONARY TRANSACTION",
          "OTHER",
          "EQUITY SWAP OR SIMILAR",
          "SMALL ACQUISITIONS",
          "EXEMPT",
          NA,
          "OTM EXERCISE",
          "PURCHASE",
          "SALE",
          "MERGER AND ACQUISITION",
          "REPORTED EARLY",
          "WILL OR LAWS OF DESCENT",
          "ITM OR ATM EXERCISE",
          "DEPOSIT INTO/WITHDRAWAL FROM VOTING TRUST"
        ),
        idTypeInsiderTransaction = c(
          "A",
          "D",
          "D",
          "D",
          "D",
          NA,
          NA,
          NA,
          NA,
          "A",
          "A",
          NA,
          "A",
          "A",
          "D",
          NA,
          NA,
          "D",
          "A",
          "D"
        )
      )
    return(insider_df)
  }

#' SEC filing code dictionary
#'
#' This function returns a
#' dictionary of SEC form filing types
#'
#' @return a \code{tibble}
#' @export
#' @import dplyr stringr
#' @family SEC
#' @family dictionary
#'
#' @examples
#' dictionary_sec_filing_codes()
dictionary_sec_filing_codes <-
  function() {
    tibble(
      idFormType = c(
        "1.01",
        "1.02",
        "1.03",
        "1.04",
        "2.01",
        "2.02",
        "2.03",
        "2.04",
        "2.05",
        "2.06",
        "3.01",
        "3.02",
        "3.03",
        "4.01",
        "4.02",
        "5.01",
        "5.02",
        "5.03",
        "5.04",
        "5.05",
        "5.06",
        "5.07",
        "5.08",
        "6.01",
        "6.02",
        "6.03",
        "6.04",
        "6.05",
        "7.01",
        "8.01",
        "9.01"
      ),
      nameFormType = c(
        "Entry into a Material Definitive Agreement",
        "Termination of a Material Definitive Agreement",
        "Bankruptcy or Receivership",
        "Mine Safety Reporting of Shutdowns and Patterns of Violations",
        "Completion of Acquisition or Disposition of Assets",
        "Results of Operations and Financial Condition",
        "Creation of a Direct Financial Obligation or an Obligation under an Off-Balance Sheet Arrangement of a Registrant",
        "Triggering Events That Accelerate or Increase a Direct Financial Obligation or an Obligation under an Off-Balance Sheet Arrangement",
        "Costs Associated with Exit or Disposal Activities",
        "Material Impairments",
        "Notice of Delisting or Failure to Satisfy a Continued Listing Rule or Standard; Transfer of Listing",
        "Unregistered Sales of Equity Securities",
        "Material Modification to Rights of Security Holders",
        "Changes in Registrant's Certifying Accountant",
        "Non-Reliance on Previously Issued Financial Statements or a Related Audit Report or Completed Interim Review",
        "Changes in Control of Registrant",
        "Departure of Directors or Certain Officers; Election of Directors; Appointment of Certain Officers; Compensatory Arrangements of Certain Officers",
        "Amendments to Articles of Incorporation or Bylaws; Change in Fiscal Year",
        "Temporary Suspension of Trading Under Registrant's Employee Benefit Plans",
        "Amendments to the Registrant's Code of Ethics, or Waiver of a Provision of the Code of Ethics",
        "Change in Shell Company Status",
        "Submission of Matters to a Vote of Security Holders",
        "Shareholder Director Nominations",
        "ABS Informational and Computational Material",
        "Change of Servicer or Trustee",
        "Change in Credit Enhancement or Other External Support",
        "Failure to Make a Required Distribution",
        "Securities Act Updating Disclosure",
        "Regulation FD Disclosure",
        "Other Events",
        "Financial Statements and Exhibits"
      ) %>% stringr::str_to_upper()
    )

  }

#' SEC form codes
#'
#' This function returns a
#' dictionary of SEC form codes
#'
#' @return a \code{tibble}
#' @export
#' @family SEC
#' @family dictionary
#'
#' @examples
#' dictionary_sec_form_codes()
dictionary_sec_form_codes <-
  function() {
    tibble(
      idForm = c(
        "R",
        "A",
        "Q",
        "CR",
        "REG",
        "REGX",
        "O",
        "P",
        "X",
        "W",
        "SEC",
        "PROXY",
        "CT",
        "IS",
        "CO",
        "T"
      ),
      nameForm = c(
        "Other Report",
        "Annual Report",
        "Quarterly Report",
        "Current Report",
        "Registration",
        "Private Offering",
        "Ownership",
        "Prospectus",
        "Exemption",
        "Withdrawal",
        "SEC Correspondence",
        "Proxy Statement",
        "Confidential Treatment",
        "Initial Statement",
        "Change in Ownership",
        "Trades"
      ) %>% stringr::str_to_upper()
    )
  }

.company_type_df <-
  function() {
    tibble(
      idCompanyType = c(
        "ic",
        "i",
        "ia",
        "bd",
        "m",
        "t",
        "b",
        "c",
        "p",
        "etf",
        "mmf",
        "mf",
        "uit",
        "cef"
      ),
      nameCompanyType = c(
        "Investment Company",
        "Insider",
        "Investment Adviser",
        "Broker-dealer",
        "Municipal Securities Dealer",
        "Transfer Agent",
        "Bank",
        "Company",
        "Private Issuer",
        "ETF",
        "Money Market Fund",
        "Mutual Fund",
        "UIT",
        "Closed-end Fund"
      )
    )
  }

#' SEC Rule dictionary
#'
#' This function retuns a
#' dictionary of SEC rules
#'
#' @return a \code{tibble}
#' @export
#' @import dplyr stringr
#'
#' @examples
#' dictionary_sec_rules()
dictionary_sec_rules <-
  function() {
    tibble(
      idRule = c(
        "06",
        "3C",
        "3C.7",
        "3C.1",
        "06b",
        "04",
        "46",
        "04.1",
        "04.2",
        "04.3",
        "05",
        "3C.6",
        "3C.5",
        "06c",
        "4a5",
        "3C.11",
        "3C.2",
        "3C.3",
        "3C.9",
        "3C.10",
        "3C.4",
        "3C.12",
        "3C.",
        "3C.14",
        "3"
      ),
      nameRule = c(
        "Rule 506",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Rule 506b",
        "Rule 504",
        "Rule 506c",
        "Rule 504b(1)(i)",
        "Rule 504b(1)(ii)",
        "Rule 504b(1)(iii)",
        "Rule 505",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Rule 506c",
        "Securities Act Section 4(a)(5)",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c",
        "Investment Company Act Section 3c"
      )
    ) %>%
      mutate_all(str_to_upper)
  }




# Company Tickers ---------------------------------------------------------



#' SEC listed public companys
#'
#' @param include_ticker_information if \code{TRUE} returns ticker information
#' @param return_message
#'
#' @return
#' @export
#' @import jsonlite dplyr purrr stringr dplyr
#' @family SEC EDGAR
#' @examples
#' edgar_tickers()
edgar_tickers <-
  function(include_ticker_information = F,
           join_sic = T,
           snake_names = F,

           return_message = T) {
    json_data <-
      "https://www.sec.gov/data/company_tickers.json" %>%
      jsonlite::fromJSON(simplifyDataFrame = TRUE, flatten = F)

    data <-
      seq_along(json_data) %>%
      map_dfr(function(x) {
        json_data[[x]] %>% flatten_dfr()
      }) %>%
      setNames(c('idCIK',
                 'idTicker',
                 "nameCompany")) %>%
      distinct() %>%
      mutate(nameCompany = nameCompany %>% str_to_upper())

    if (include_ticker_information) {
      "\n\nAcquiring ticker information\n\n" %>% cat(fill = T)
      sec_tickers_info_safe <-
        possibly(sec_tickers_info, tibble())

      df_tickers <-
        sec_tickers_info(tickers = data$idTicker, return_message = return_message, join_sic = join_sic, unformat = T, snake_names = F, convert_case = T, include_address = T)

      df_tickers <-
        df_tickers %>%
        rename(nameCompanyTicker = nameCompany,
               idCIKTicker = idCIK)

      data <-
        data %>%
        left_join(
          df_tickers, by = "idTicker"
        )

    }

    data <- data %>% munge_tbl(snake_names = snake_names)

    data
  }


# EDGAR Counts ------------------------------------------------------------

.cik_filing_count <-
  function(cik = 886982,
           return_message = TRUE) {
    code_cik  <-
      cik %>%
      pad_cik()

    url <-
      list("https://www.sec.gov/cgi-bin/srch-edgar?text=CIK%3D",
           code_cik,'&first=1994&last=',
           Sys.Date() %>% lubridate::year() %>% as.numeric()
      ) %>%
      purrr::reduce(paste0)

    page <-
      url %>%
      read_html()

    no_data <-
      page %>%
      html_nodes(css = 'p+ b') %>%
      html_text() %>% length() == 0

    if (no_data) {
      return(tibble(idCIK = cik))
    }

    filings <-
      page %>%
      html_nodes(css = 'p+ b') %>%
      html_text() %>%
      as.character() %>%
      readr::parse_number()

    pages <-
      ceiling(filings/100)

    df <-
      tibble(idCIK = cik,
             countFilings = filings,
             countPages = pages) %>%
      mutate(isMultiSearch = pages > 20)

    if (return_message) {
      list("CIK: ", cik, " has ", filings %>% formattable::comma(digits = 0), ' Filings') %>%
        purrr::reduce(paste0) %>%
        cat(fill = T)
    }
    df
  }

#' CIK Filing Counts
#'
#' @param cik CIK codes
#' @param return_message
#'
#' @return
#' @export
#'
#' @examples
cik_filing_counts <-
  function(cik, return_message = T) {
    .cik_filing_count_safe <-
      possibly(.cik_filing_count, tibble())
    cik %>%
      future_map_dfr(function(x){
        .cik_filing_count_safe(cik = x,
                               return_message = return_message)
      })
  }

.sic_filing_count <-
  function(sic = 800,
           return_message = TRUE) {
    code_sic  <-
      sic %>%
      pad_sic()

    url <-
      list("https://www.sec.gov/cgi-bin/srch-edgar?text=ASSIGNED-SIC%3D",
           code_sic,'&first=1994&last=',
           Sys.Date() %>% lubridate::year() %>% as.numeric()
      ) %>%
      purrr::reduce(paste0)

    page <-
      url %>%
      read_html()

    no_data <-
      page %>%
      html_nodes(css = 'p+ b') %>%
      html_text() %>% length() == 0

    if (no_data) {
      return(tibble(idCIK = cik))
    }

    filings <-
      page %>%
      html_nodes(css = 'p+ b') %>%
      html_text() %>%
      as.character() %>%
      readr::parse_number()

    pages <-
      ceiling(filings/100)

    df <-
      tibble(idSIC = sic,
             countFilings = filings,
             countPages = pages) %>%
      mutate(isMultiSearch = pages > 20)

    if (return_message) {
      list("SIC: ", sic, " has ", filings %>% formattable::comma(digits = 0), ' Filings') %>%
        purrr::reduce(paste0) %>%
        cat(fill = T)
    }
    df
  }



#' SIC Counts
#'
#' @param sic vector of SIC codes
#' @param join_sec if \code{TRUE} joins SIC data
#' @param use_all_sic_codes uses all SIC codes
#' @param return_message
#'
#'
#' @return
#' @export
#' @import dplyr purrr curl formattable tidyr stringr lubridate rvest httr xml2 jsonlite readr stringi
#' @examples
sic_filing_count <-
  function(sic = NULL, join_sic = T, snake_names = F,
           use_all_sic_codes = F,
           return_message = T,
           unformat = F) {
    if (use_all_sic_codes) {
      sic <-
        dictionary_sic_codes() %>%
        pull(idSIC)
    }
    if (length(sic) == 0) {
      "Enter SIC Codes"
    }
    .sic_filing_count_safe <- possibly(.sic_filing_count, tibble())

    data <-
      sic %>%
      future_map_dfr(function(x){
        .sic_filing_count_safe(sic = x, return_message = return_message)
      })

    if (join_sic) {
      data <-
        data %>%
        left_join(dictionary_sic_codes(), by = "idSIC")
    }

    data <-
      data %>%
      munge_tbl(snake_names = snake_names, unformat = F)

    data
  }


# SEC Dictionaries --------------------------------------------------------

.resolve_form_columns <-
  function(data) {
    data %>%
      mutate_if(is.character,
                funs(ifelse(. %in% c('_', "NULL"), NA, .))) %>%
      mutate_at(data %>% select(
        dplyr::matches(
          "^name|^description|^idDay|^type|^title|^description|^code|^address|^city|^state|^relationship"
        )
      ) %>% names(),
      funs(. %>% str_to_upper())) %>%
      mutate_at(data %>% select(
        dplyr::matches("^price|^count|^amount|^value|^idCIK|^yearIncorporation|^idSIC|^pershare|^number|^percent|^term|^pct|^score|^year")
      ) %>% names(),
      funs(. %>% as.character() %>% readr::parse_number())) %>%
      mutate_at(data %>% select(dplyr::matches("^is|^has")) %>% names(),
                funs(
                  ifelse(
                    . %in% c('true', 'false'),
                    . %>% as.logical(),
                    . %>% as.numeric() %>% as.logical()
                  )
                )) %>%
      mutate_at(data %>% select(dplyr::matches("^date")) %>% names(),
                funs(. %>% lubridate::ymd())) %>%
      mutate_at(data %>% select(dplyr::matches("^amountValueHoldings|^valueSecurities")) %>% names(),
                funs(. * 1000)) %>%
      suppressWarnings() %>%
      suppressMessages() %>%
      select(which(colMeans(is.na(.)) < 1))
  }


# SIC codes
# https://www.sec.gov/info/edgar/siccodes.htm

#' SIC Code dictionary
#'
#' @return
#' @export
#' @import rvest stringr dplyr purrr tidyr xml2
#' @family SEC
#' @family dictionary
#'
#'
#' @examples
#' dictionary_sic_codes()

dictionary_sic_codes <-
  memoise::memoise(function() {
    page <-
      "https://www.sec.gov/info/edgar/siccodes.htm" %>%
      read_html()

    page %>% html_table(fill = T) %>% first() %>% as_tibble() %>%
      setNames(c("idSIC", "nameOfficeAD", "nameIndustry")) %>%
      munge_tbl(convert_case = T)
  })

# Form Descriptions

#' SEC form dictionary
#'
#' @return
#' @export
#' @import dplyr purrr curl formattable tidyr stringr lubridate rvest httr xml2 jsonlite readr stringi
#' @examples
dictionary_sec_forms <-
  function() {
    page <-
      "https://www.sec.gov/forms" %>%
      read_html()

    forms <-
      page %>%
      html_nodes('.release-number-content') %>%
      html_text() %>%
      str_trim() %>%
      str_to_upper() %>%
      str_replace_all('NUMBER:', '')

    form_names <-
      page %>%
      html_nodes('.views-field-field-display-title a') %>%
      html_text() %>%
      str_to_upper() %>%
      str_trim() %>%
      str_replace_all('\r|\n|\u0092|\u0097', '') %>%
      str_replace_all('(PDF)', '') %>%
      str_replace_all('\\(', '') %>%
      str_replace_all('\\)', '') %>%
      str_trim()

    url_description_form <-
      page %>%
      html_nodes('.views-field-field-display-title a') %>%
      html_attr('href') %>%
      paste0('https://www.sec.gov', .)

    date_updated <-
      page %>%
      html_nodes('.datetime') %>%
      html_text() %>%
      list("01-", .) %>%
      purrr::reduce(paste0) %>%
      lubridate::dmy()

    sec_ids <-
      page %>%
      html_nodes('.list-page-detail-content') %>%
      html_text() %>%
      str_trim() %>%
      str_replace_all('SEC Number:', '') %>%
      str_trim()

    sec_ids[sec_ids == ''] <-
      NA


    reference <-
      page %>%
      html_nodes('td.views-field-term-node-tid') %>%
      html_text() %>%
      str_trim() %>%
      str_to_upper() %>%
      str_replace_all('\\TOPIC(S):','') %>%
      str_split('\\:') %>%
      future_map(function(x){
        x %>% str_split('\\:') %>% purrr::flatten_chr() %>% .[[2]]
      }) %>%
      purrr::flatten_chr()

    data <-
      tibble(
        idForm = forms,
        nameForm = form_names,
        urlFormDescription = url_description_form,
        dateFormUpdate = date_updated,
        idSECNumber = sec_ids,
        referenceForm = reference
      ) %>%
      arrange(desc(dateFormUpdate))


    data

  }


# General -----------------------------------------------------------------

#' Parse an EDGAR data frame for underlying tables
#'
#' @param all_data
#' @param table_name_initial
#' @param parse_all_filings
#' @param parse_complete_text_filings
#' @param parse_form_d
#' @param parse_13F
#' @param parse_small_offerings
#' @param parse_form_3_4s
#' @param parse_asset_files
#' @param parse_xbrl
#' @param assign_to_environment
#' @param nest_data
#' @param return_message
#'
#' @return
#' @export
#'
#' @examples
parse_for_tables <-
  function(all_data,
           table_name_initial = "All Filings",
           parse_all_filings = TRUE,
           parse_complete_text_filings = TRUE,
           parse_form_d = TRUE,
           parse_13F = TRUE,
           parse_small_offerings = TRUE,
           parse_form_3_4s = TRUE,
           parse_asset_files = TRUE,
           parse_xbrl = TRUE,
           assign_to_environment = FALSE,
           nest_data = TRUE,
           return_message = TRUE) {
    all_tables <-
      tibble()

    parse_form_data_safe <-
      purrr::possibly(.parse_form_data, tibble())
    parse_all_filings <-
      c(
        parse_complete_text_filings,
        parse_form_d,
        parse_13F,
        parse_small_offerings,
        parse_form_3_4s,
        parse_asset_files,
        parse_xbrl
      ) %>%
      sum() > 0
    if ('termSearch' %in% names(all_data)) {
      df_general <-
        all_data %>%
        select(termSearch, countFilings) %>%
        distinct()

      all_tables <-
        all_tables %>%
        bind_rows(tibble(nameTable = 'Summary', dataTable = list(df_general)))

      all_data <-
        all_data %>% select(-c(termSearch, countFilings))

    } else {
      all_tables <-
        all_tables %>%
        bind_rows(tibble(nameTable = 'Summary', dataTable = list(tibble())))
    }

    if (parse_all_filings) {
      all_data <-
        all_data %>%
        select(-dplyr::matches(
          "hasAssetFile|isFormD|is13F|isForm3_4|hasSmallOfferingData"
        )) %>%
        distinct()

      if (!'typeFile' %in% names(all_data)) {
        all_data <-
          all_data %>%
          mutate(typeFile = ifelse(urlSECFilingDirectory %>% str_detect('htm'),
                                   'html', NA))
      }

      search_df <-
        all_data %>%
        select(dateFiling,
               dplyr::matches("typeFile"),
               dplyr::matches("idForm"),
               urlSECFilingDirectory) %>%
        distinct()

      df_all_filings <-
        search_df$urlSECFilingDirectory %>%
        unique() %>%
        future_map_dfr(function(x){
          .parse_sec_filing_index(urls = x)
        })



      df_all_filings <-
        df_all_filings %>%
        nest(-c(idCIK, urlSECFilingDirectory, dplyr::matches("idAccession")), .key = dataFilings)

      all_data <-
        all_data %>%
        select(-dplyr::matches("dataFilings")) %>%
        left_join(df_all_filings %>% select(-one_of(c('idCIK', 'idAccession')))) %>%
        mutate(hasNoFilings = dataFilings %>% map_lgl(is_null)) %>%
        suppressMessages()

      all_tables <-
        all_tables %>%
        bind_rows(tibble(nameTable = table_name_initial, dataTable = list(all_data)))

      .all_filings <-
        all_data %>%
        filter(!hasNoFilings) %>%
        select(idCIK:typeFile, dataFilings)

      if (!'idCIKFiler' %in% names(.all_filings)) {
        .all_filings <-
          .all_filings %>%
          dplyr::rename(idCIKFiler = idCIK)
      }

      if (!'typeFileFiler' %in% names(.all_filings)) {
        .all_filings <-
          .all_filings %>%
          dplyr::rename(typeFileFiler = typeFile)
      }

      .all_filings <-
        .all_filings %>%
        select(dplyr::matches("idCIK|data")) %>%
        unnest() %>%
        distinct()

      all_tables <-
        all_tables %>%
        bind_rows(tibble(nameTable = 'All Filing URLS', dataTable = list(.all_filings)))

      if (parse_complete_text_filings) {
        if (!'urlTextFilingFull' %in% names(all_data)) {
          all_data <-
            all_data %>%
            mutate(urlTextFilingFull = urlSECFilingDirectory %>% str_replace_all("-index.htm", ".txt"))
        }
        urls <-
          all_data$urlTextFilingFull %>%
          unique()
        sec_complete_filings_safe <-
          purrr::possibly(.sec_complete_filings, tibble())
        all_text_df <-
          .sec_complete_filings(urls = urls)

        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = 'Text Filings', dataTable = list(all_text_df)))
      }

      if (parse_form_d) {
        df_form_ds <-
          .all_filings %>%
          parse_form_data_safe(filter_parameter = 'isFormD')
        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = 'FormDs', dataTable = list(df_form_ds)))
      }

      if (parse_13F) {
        df_13F <-
          .all_filings %>%
          parse_form_data_safe(filter_parameter = 'is13FFiling')
        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = '13Fs', dataTable = list(df_13F)))
      }

      if (parse_small_offerings) {
        df_small_offerings <-
          .all_filings %>%
          parse_form_data_safe(filter_parameter = 'hasSmallOfferingData')
        all_tables <-
          all_tables %>%
          bind_rows(tibble(
            nameTable = 'Small Offerings',
            dataTable = list(df_small_offerings)
          ))
      }

      if (parse_form_3_4s) {
        df_form3_4 <-
          .all_filings %>%
          parse_form_data_safe(filter_parameter = 'isForm3_4')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = 'Form 3 and 4', dataTable = list(df_form3_4)))
      }

      if (parse_asset_files) {
        df_assets <-
          .all_filings %>%
          .parse_form_data(filter_parameter = 'hasAssetFile')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = 'Asset Data', dataTable = list(df_assets)))
      }

      if (parse_xbrl) {
        df_xbrl <-
          .all_filings %>%
          parse_form_data_safe(filter_parameter = 'isXBRLInstanceFile')

        all_tables <-
          all_tables %>%
          bind_rows(tibble(nameTable = 'XBRL', dataTable = list(df_xbrl)))
      }

    } else {
      all_tables <-
        all_tables %>%
        bind_rows(tibble(nameTable = 'TermsFilings', dataTable = list(all_data)))

    }

    all_tables <-
      all_tables %>%
      mutate(countCols = dataTable %>% map_dbl(ncol)) %>%
      filter(countCols > 0) %>%
      select(-countCols)

    if (assign_to_environment) {
      table_name_df <-
        all_tables %>%
        select(nameTable) %>%
        distinct() %>%
        mutate(
          nameDF =
            list('data', nameTable %>% str_replace_all('\\ ', ''), 'EDGAR') %>% purrr::invoke(paste0, .)
        )

      1:nrow(table_name_df) %>%
        walk(function(x) {
          df_data <-
            all_tables %>%
            slice(x) %>%
            select(dataTable) %>%
            unnest()

          df_name <-
            table_name_df %>% slice(x) %>% .$nameDF

          df_data <-
            df_data %>%
            mutate_at(.vars =
                        df_data %>% select(dplyr::matches("^amount|^price|^value")) %>% names(),
                      funs(. %>% formattable::currency(digits = 2))) %>%
            mutate_at(
              .vars =
                df_data %>% select(dplyr::matches("^count[A-Z]|^number")) %>% select(-dplyr::matches("country")) %>% names(),
              funs(. %>% as.numeric() %>%  formattable::comma(digits = 0))
            ) %>%
            mutate_at(
              .vars = df_data %>% select(dplyr::matches("^percent|^pct")) %>% select(-dplyr::matches("country")) %>% names(),
              funs(. %>% as.numeric() %>% formattable::percent(digits = 0))
            ) %>%
            select(which(colMeans(is.na(.)) < 1)) %>%
            tidy_column_formats()

          assign(x = df_name,
                 eval(df_data),
                 envir = .GlobalEnv)
        })
    }
    return(all_tables)
  }


# SEC Free Text Search ----------------------------------------------------


.generate_ft_search_urls <-
  function(search_term = c('"Rockwood Capital"'),
           return_message = TRUE) {
    term <-
      search_term %>%
      URLencode()

    base_url <-
      list("https://searchwww.sec.gov/EDGARFSClient/jsp/EDGAR_MainAccess.jsp?search_text=", term, "&sort=Date&startDoc=0&numResults=100&isAdv=true&formType=1&fromDate=mm/dd/yyyy&toDate=mm/dd/yyyy&stemming=true") %>%
      purrr::reduce(paste0)

    page <-
      base_url %>%
      read_html()

    page_total <-
      page %>%
      html_nodes('#header .normal+ .normalbold') %>%
      html_text() %>%
      as.character() %>%
      readr::parse_number() %>%
      max(na.rm = TRUE)

    length_out <-
      ceiling(page_total/100)

    times <-
      seq(0,by = 100, length.out = length_out)

    urls <-
      list("https://searchwww.sec.gov/EDGARFSClient/jsp/EDGAR_MainAccess.jsp?search_text=", term, "&sort=Date&startDoc=", times,"&numResults=100&isAdv=true&formType=1&fromDate=mm/dd/yyyy&toDate=mm/dd/yyyy&stemming=true") %>%
      purrr::reduce(paste0)
    if (return_message) {
      glue("Found SEC free text urls for {search_term}") %>% cat()
    }
    tibble(termSearch = search_term, urlSECSearch = urls)
  }

.parse_ft_filing_page <-
  function(urls, return_message = TRUE) {
    df <-
      tibble()
    success <- function(res) {
      if (return_message) {
        list("Parsing: ", res$url) %>% purrr::reduce(paste0) %>% cat(fill = T)
      }
      page <-
        res$content %>%
        read_html()
      search_url <-
        res$url
      dates <-
        page %>%
        html_nodes('i.blue') %>%
        html_text() %>%
        lubridate::mdy()

      search_items <-
        page %>%
        html_nodes('.infoBorder+ tr td+ td #viewFiling') %>%
        html_text() %>%
        str_trim() %>%
        str_to_upper()

      urlFiling <-
        page %>%
        html_nodes('.infoBorder+ tr td+ td #viewFiling') %>%
        html_attr('href') %>%
        str_replace_all("javascript:opennew",'') %>%
        str_replace_all("'|\\(",'') %>%
        map_chr(function(x){
          x %>%
            str_split('\\,') %>%
            flatten_chr() %>%
            .[[1]]
        })

      ciks <-
        urlFiling %>%
        map_dbl(function(x){
          x %>% str_replace_all('http://www.sec.gov/Archives/edgar/data/','') %>%
            str_split('/') %>%
            flatten_chr() %>%
            .[[1]] %>%
            as.character() %>%
            readr::parse_number()
        })

      text <-
        page %>%
        html_nodes('.small') %>%
        html_text() %>%
        str_to_upper()

      data <-
        tibble(
          dateFiling = dates[seq_along(ciks)],
          idCIKFiler = ciks,
          nameFilerFilingExhibit = search_items,
          descriptionText = text[seq_along(ciks)],
          urlSECFiling = urlFiling
        ) %>%
        tidyr::separate(nameFilerFilingExhibit, sep = '\\ FOR ',
                        into = c('exhibitFiling', 'nameFiler'), remove = FALSE) %>%
        tidyr::separate(exhibitFiling, sep = '\\ OF ',
                        into = c('idExhibit', 'idForm'), remove = TRUE) %>%
        suppressWarnings()

      data <-
        data %>%
        mutate(idForm = ifelse(idForm %>% is.na(), idExhibit, idForm),
               idExhibit = ifelse(idForm == idExhibit, NA, idExhibit),
               urlSECSearch = search_url) %>%
        select(dateFiling, idCIKFiler, nameFiler, idForm, idExhibit, everything()) %>%
        suppressWarnings() %>%
        suppressMessages()

      df <<-
        df %>%
        bind_rows(data)
    }
    failure <- function(msg){
      tibble()
    }
    urls %>%
      walk(function(x){
        curl_fetch_multi(url = x, success, failure)
      })
    multi_run()
    df
  }

#' Title
#'
#' @param search_terms
#' @param nest_data
#' @param return_message
#' @import dplyr purrr curl formattable tidyr stringr lubridate rvest httr xml2 jsonlite readr stringi
#' @return
#' @export
#' @examples
#' edgar_ft_terms(search_terms = c('"Jared Kushner"', '"EJF Capital"', '"Blackstone Real Estate"'))

edgar_ft_terms <-
  function(search_terms = c('"Jared Kushner"', '"EJF Capital"', '"Blackstone Real Estate"'),
           include_counts = F,
           nest_data = FALSE,
           return_message = TRUE) {
    .generate_ft_search_urls_safe <-
      purrr::possibly(.generate_ft_search_urls, tibble())

    df_urls <-
      search_terms %>%
      future_map_dfr(function(x) {
        .generate_ft_search_urls_safe(search_term = x)
      })

    all_data <-
      .parse_ft_filing_page(urls = df_urls$urlSECSearch, return_message = return_message) %>%
      left_join(df_urls) %>%
      select(termSearch, everything()) %>%
      suppressMessages() %>%
      arrange(desc(termSearch), desc(dateFiling)) %>%
      find_target_filings()

    if (include_counts) {
      .cik_filing_count_safe <-
        purrr::possibly(.cik_filing_count, tibble())
      df_counts <-
        all_data %>%
        pull(idCIKFiler) %>%
        unique() %>%
        future_map_dfr(function(x){
          .cik_filing_count_safe(cik = x)
        })

      all_data <-
        all_data %>%
        left_join(df_counts %>%
                    dplyr::rename(idCIKFiler = idCIK) %>%
                    select(idCIKFiler, countFilings)) %>%
        select(termSearch:idCIKFiler, countFilings, everything()) %>%
        suppressMessages() %>%
        arrange(dateFiling, termSearch)
    }

    if (return_message) {
      results <-
        all_data %>% group_by(termSearch) %>% count(termSearch) %>% mutate(n = n %>% formattable::comma(digits = 0)) %>%
        unite(termMessage, termSearch, n, sep = ': ') %>%
        .$termMessage

      list(
        "\nSEC free text search filing mentions in the last 4 years:\n",
        results %>% paste0(collapse = '\n')
      ) %>%
        purrr::reduce(paste0) %>% cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-c(termSearch), .key = dataFilings)
    }

    return(all_data)
  }

# Boolean Archive Search --------------------------------------------------
.sec_parameter_df <- function() {
  tibble(
    nameParameter = c(
      "Company Name",
      "Company CIK",
      "Public Document Count",
      "Accession Number",
      "Form Type",
      "Period",
      "Filing Date",
      "Company Name Confirmed",
      "CIK",
      "SIC",
      "IRS Number",
      "State of Incorporation",
      "Fiscal Year End",
      "Form Type Exact",
      "SEC Act",
      "File Number",
      "Business Address",
      "Mailing Address",
      "Former Company Name",
      "Date of Company Name Change",
      "Company",
      "form"
    ),
    slugParameter = c(
      "company-name",
      "company-cik",
      "Public-Document-Count",
      "Accession-Number",
      "type",
      "period",
      "Filing-Date",
      "Company-Name-Confirmed",
      "cik",
      "ASSIGNED-SIC",
      "irs-number",
      "STATE-OF-INCORPORATION",
      "Fiscal-Year-End",
      "Form-Type",
      "Act",
      "File-Number",
      "Business-Address",
      "Mailing-Address",
      "FORMER-CONFORMED-NAME",
      "DATE-CHANGED",
      'company-name',
      "type"
    )
  )
}


# https://www.sec.gov/edgar/searchedgar/edgarzones.htm
# https://www.sec.gov/cgi-bin/srch-edgar?ASSIGNED-SIC%3D0800&first=1994&last=2017
# https://www.sec.gov/cgi-bin/srch-edgar?text=Rockwood&start=1901&count=100&first=1994&last=2017
# https://www.sec.gov/edgar/searchedgar/search_help.htm
.parse_boolean_search_page <-
  function(urls, return_message = TRUE) {
    df <-
      tibble()
    success <- function(res){
      if (return_message) {
        list("Parsing: ", res$url, "\n") %>% purrr::reduce(paste0) %>% cat(fill = T)
      }
      page <-
        res$content %>%
        read_html()

      use_url <-
        page %>%
        html_nodes('div td:nth-child(2) a') %>%
        html_text() %>%
        str_to_upper() %>% length() == 0
      if (use_url) {
        page <-
          res$url %>%
          read_html()

      }

      entities <-
        page %>%
        html_nodes('div td:nth-child(2) a') %>%
        html_text() %>%
        str_to_upper()

      stems <-
        page %>%
        html_nodes('div td:nth-child(2) a') %>%
        html_attr('href')

      if (stems %>% length() > 0 ) {
        data <-
          seq_along(stems) %>%
          future_map_dfr(function(x){
            stem <-
              stems[[x]]

            url_filing <-
              'https://www.sec.gov' %>% paste0(stem)

            items <-
              stem %>%
              str_replace_all('/Archives/edgar/data/','') %>%
              str_split('/') %>%
              flatten_chr()

            cik <-
              items[[1]] %>% as.numeric()

            accession <-
              items[length(items)]

            is_html <-
              accession %>% str_detect(".htm|.html")

            tibble(idRow = x, idCIK = cik,
                   isHTML = is_html,
                   slugAccension = accession,
                   urlSECFilingDirectory = url_filing)

          })
      } else {
        data <-
          tibble(idRow = x, idCIK = NA)
      }

      form <-
        page %>%
        html_nodes('td:nth-child(4)') %>%
        html_text() %>%
        str_to_upper()

      if (!length(form) == nrow(data)) {
        form <-
          form[2:length(form)]
      }

      date_filing <-
        page %>%
        html_nodes('td:nth-child(5)') %>%
        html_text() %>%
        lubridate::mdy()

      file_size <-
        page %>%
        html_nodes('td:nth-child(6)') %>%
        html_text() %>%
        as.character() %>%
        readr::parse_number()

      data <-
        data %>%
        mutate(
          nameEntityLegal = entities,
          idForm = form,
          dateFiling = date_filing,
          sizeFile = file_size
        ) %>%
        resolve_legal_name() %>%
        select(-idRow) %>%
        select(dateFiling, idCIK, nameEntity, idForm, everything()) %>%
        find_target_filings()
      search_url <-
        res$url
      data <-
        data %>%
        separate(slugAccension,
                 sep = '\\.',
                 into = c('idAccession', 'typeFile'),
                 extra = "merge",
                 fill = "right"
        ) %>%
        mutate(idAccession = idAccession %>% str_replace_all('-index', '')) %>%
        separate(
          idAccession,
          into = c('idCIKFilerSubmission', 'codeYear', 'countFilerYearFilings'),
          sep = '\\-',
          remove = FALSE
        ) %>%
        mutate_at(
          c('idCIKFilerSubmission', 'codeYear', 'countFilerYearFilings'),
          funs(. %>% as.character() %>% readr::parse_number())
        ) %>%
        suppressMessages() %>%
        suppressWarnings() %>%
        mutate(
          urlSECSearch = search_url,
          isSameFiler = ifelse(idCIK == idCIKFilerSubmission, TRUE, FALSE),
          urlTextFilingFull = ifelse(
            typeFile %>% str_detect('htm'),
            urlSECFilingDirectory %>% str_replace_all("-index.htm", ".txt"),
            urlSECFilingDirectory
          )
        )

      df <<-
        df %>%
        bind_rows(data)
    }
    failure <- function(msg){
      tibble()
    }
    urls %>%
      walk(function(x){
        curl_fetch_multi(url = x, success, failure)
      })
    multi_run()
    df
  }



.generate_edgar_search_url <-
  function(search_term = '"Corona Virus"',
           parameter = NULL,
           year_start = NULL,
           year_end = NULL,
           page_start = 0) {
    if (length(search_term) == 0) {
      stop("Please enter a search term")
    }

    base <-
      'https://www.sec.gov/cgi-bin/srch-edgar?text='

    is_non_numeric <-
      class(search_term) != "numeric"
    if (is_non_numeric) {
      term <-
        search_term %>%
        URLencode()
    } else {
      term <-
        search_term
    }
    term <-
      term %>% str_replace_all('\\=', '%3D')
    has_parameter <-
      length(parameter) > 0
    if (has_parameter) {
      df_params <-
        .sec_parameter_df() %>%
        mutate_all(str_to_lower)

      parameter <-
        parameter %>% str_to_lower()

      wrong_param <-
        !parameter %in% df_params$nameParameter

      if (wrong_param) {
        stop(
          list(
            "SEC boolean search parameters can only be\n",
            paste0(df_params$nameParameter, collapse = '\n')
          ) %>% purrr::reduce(paste0)
        )
      }

      param_slug <-
        df_params %>%
        filter(nameParameter == parameter) %>%
        .$slugParameter

      if (parameter %>% str_to_lower() %in% c('cik', 'company cik')) {
        term <-
          term %>%
          pad_cik()
      }

      if (parameter %>% str_to_lower() %>% str_detect('date')) {
        term <-
          term <-
          lubridate::ymd() %>% as.character() %>% str_replace_all('\\-', '')
      }

      if (parameter %>% str_to_lower() == 'sic') {
        term <-
          term %>%
          pad_sic()
      }

      slug_term <-
        list(param_slug, '%3D', term) %>%
        purrr::reduce(paste0)
    } else {
      slug_term <-
        term
    }

    if (length(year_start) == 0) {
      year_start <-
        1994
    }

    if (length(year_end) == 0) {
      year_end <-
        Sys.Date() %>% lubridate::year() %>%
        as.numeric()
    }

    url <-
      list(base, slug_term, '&start=', page_start, '&count=100',
           '&first=', year_start, '&last=', year_end) %>%
      purrr::reduce(paste0)

    url
  }

.generate_search_term_urls <-
  function(search_term = c('"Rockwood Capital"'),
           parameter = NULL,
           year_start = NULL,
           year_end = NULL){
    url <-
      .generate_edgar_search_url(
        search_term = search_term,
        parameter = parameter,
        year_start = year_start,
        year_end = year_end,
        page_start = 0
      )

    page <-
      url %>%
      read_html()

    filings <-
      page %>%
      html_nodes(css = 'p+ b') %>%
      html_text() %>%
      as.character() %>%
      readr::parse_number()

    if (length(parameter) == 0){
      search_message <-
        search_term

    } else {
      search_message <-
        list(parameter, ' = ', search_term) %>% purrr::reduce(paste0)
    }

    pages <-
      ceiling(filings / 100)

    list('\n',filings %>% formattable::comma(digits = 0), " total filings for search term: ",
         search_message, ' to parse'
    ) %>%
      purrr::reduce(paste0) %>%
      cat(fill = T)


    page_count <-
      seq(0, by = 100, length.out = pages)

    if (page_count %>% length() == 0) {
      page_count <-
        0
    }

    urls <-
      page_count %>%
      map_chr(function(x) {
        .generate_edgar_search_url(
          search_term = search_term,
          parameter = parameter,
          year_start = year_start,
          year_end = year_end,
          page_start = x
        )
      })
    rm(page)

    df_urls <-
      tibble(termSearch = search_term,
             countFilings = filings, urlSECSearch = urls)
    return(df_urls)
  }

.sec_search_term <-
  function(search_term = "Boston Properties",
           parameter = NULL,
           year_start = NULL,
           year_end = NULL,
           return_message = TRUE){
    url_df <-
      .generate_search_term_urls(
        search_term = search_term,
        parameter = parameter,
        year_start = year_start,
        year_end = year_end
      )
    urls <-
      url_df$urlSECSearch

    all_data <-
      .parse_boolean_search_page(urls = urls)


    all_data <-
      all_data %>%
      left_join(url_df, by = "urlSECSearch") %>%
      suppressMessages() %>%
      select(termSearch, countFilings, everything())

    if (return_message) {
      list(
        "\nFound ",
        all_data %>% nrow() %>% formattable::comma(digits = 0),
        ' SEC filings for ',
        search_term,
        '\n'
      ) %>%
        purrr::reduce(paste0) %>%
        cat(fill = T)
    }

    all_data <-
      all_data %>%
      arrange(desc(dateFiling))

    all_data
  }


#' EDGAR Search for Terms
#'
#' @param search_terms
#' @param parameter
#' @param year_start
#' @param year_end
#' @param parse_all_filings
#' @param parse_form_d
#' @param parse_13F
#' @param parse_small_offerings
#' @param parse_form_3_4s
#' @param parse_asset_files
#' @param parse_xbrl
#' @param assign_to_environment
#' @param nest_data
#' @param return_message
#'
#' @return
#' @export
#' @import dplyr purrr curl formattable tidyr stringr lubridate rvest httr xml2 jsonlite readr stringi XBRL jsonlite
#' @importFrom jsonlite fromJSON
#' @examples
#' edgar_search_terms(search_terms = "China", year_start = 2020)

edgar_search_terms <-
  function(search_terms = NULL,
           parameter = NULL,
           year_start = NULL,
           year_end = NULL,
           table_name_initial = "All Filings",
           parse_all_filings = TRUE,
           parse_complete_text_filings = FALSE,
           parse_form_d = FALSE,
           parse_13F = FALSE,
           parse_small_offerings = FALSE,
           parse_form_3_4s = FALSE,
           parse_asset_files = FALSE,
           parse_xbrl = FALSE,
           assign_to_environment = TRUE,
           nest_data = TRUE,
           return_message = TRUE) {

    sec_search_term_safe <-
      purrr::possibly(.sec_search_term, tibble())

    all_data <-
      search_terms %>%
      future_map_dfr(function(x) {
        sec_search_term_safe(
          search_term = x,
          parameter = parameter,
          year_start = year_start,
          year_end = year_end
        )
      }) %>%
      dplyr::select(-dplyr::matches("urlSECSearch")) %>%
      distinct()


    if (all_data %>% nrow() == 0) {
      return(tibble())
    }

    parse_for_tables_safe <-
      purrr::possibly(parse_for_tables, tibble())

    all_tables <-
      parse_for_tables_safe(
        all_data = all_data,
        table_name_initial = table_name_initial,
        parse_all_filings = parse_all_filings,
        parse_complete_text_filings = parse_complete_text_filings,
        parse_form_d = parse_form_d,
        parse_13F = parse_13F,
        parse_small_offerings = parse_small_offerings,
        parse_form_3_4s = parse_form_3_4s,
        parse_asset_files = parse_asset_files,
        parse_xbrl = parse_xbrl,
        nest_data = nest_data,
        return_message = return_message
      )

    if (all_tables %>% nrow() == 0) {
      return(all_data)
    }

    all_tables <-
      all_tables %>%
      bind_rows(tibble(nameTable = 'Search Filings', dataTable = list(all_data)))

    if (assign_to_environment) {
      table_name_df <-
        all_tables %>%
        select(nameTable) %>%
        distinct() %>%
        mutate(
          nameDF =
            list('dataFiler', nameTable %>% str_replace_all('\\ ', '')) %>% purrr::invoke(paste0, .)
        )

      1:nrow(table_name_df) %>%
        walk(function(x) {
          df_name <-
            table_name_df %>% slice(x) %>% .$nameDF

          df_data <-
            all_tables %>%
            filter(nameTable == table_name_df$nameTable[[x]]) %>%
            select(dplyr::matches(c('idCIK|nameEntity|dataTable'))) %>%
            unnest() %>%
            suppressWarnings()

          has_unnest <-
            names(df_data) %>% str_detect('data') %>% sum(na.rm = TRUE) > 1

          if (has_unnest) {
            base_names <-
              df_data %>% select(-dplyr::matches("data")) %>% names()

            df_data_names <-
              names(df_data)[names(df_data) %>% str_detect('data')]

            for (df_data_name in df_data_names) {
              table <-
                df_data %>%
                select(one_of(c(base_names, df_data_name))) %>%
                unnest() %>%
                select(which(
                  colMeans(is.na(.)) < 1
                ))

              df_table_name <-
                list(df_name, df_data_name %>% str_replace_all('data', '')) %>% purrr::reduce(paste0)
              assign(x = df_table_name,
                     eval(table),
                     envir = .GlobalEnv)
            }

          } else {
            has_unnest <-
              df_data %>% names() %>% str_detect('data') %>% sum(na.rm = TRUE) > 0
            if (has_unnest) {
              df_data <-
                df_data %>%
                unnest()

              select_cols <- tibble(nameData = names(df_data)) %>%
                mutate(idColumn = 1:n()) %>%
                group_by(nameData) %>%
                mutate(countColumn = 1:n()) %>%
                ungroup() %>%
                filter(countColumn == min(countColumn)) %>%
                .$idColumn

              df_data <-
                df_data[, select_cols]

              table <-
                df_data %>%
                select(which(
                  colMeans(is.na(.)) < 1
                ))
              assign(x = df_name,
                     eval(table),
                     envir = .GlobalEnv)
            } else {
              table <-
                df_data %>%
                select(which(
                  colMeans(is.na(.)) < 1
                ))
              assign(x = df_name,
                     eval(table),
                     envir = .GlobalEnv)
            }
          }
        })
    }
    return(all_tables)
  }



# Most Recent Filings -----------------------------------------------------

.parse_most_recent_filing_form_page <-
  function(url = "https://www.sec.gov/cgi-bin/current?q1=0&q2=6&q3=10-D", return_message = F) {
    page <-
      url %>%
      read_html()

    data <-
      page %>%
      html_nodes(css = 'td pre') %>%
      html_text() %>%
      str_replace_all('Date Filed   Form        CIK Code     Company Name','') %>%
      read_table(col_names = FALSE) %>%
      purrr::set_names(c('dateFiling', 'idForm', 'idCIK', 'nameFiler')) %>%
      mutate(nameFiler = nameFiler %>% str_to_upper()) %>%
      suppressWarnings() %>%
      suppressMessages()

    urls <-
      page %>%
      html_nodes('pre a') %>%
      html_attr('href') %>%
      paste0('https://www.sec.gov',.)
    data_match <-
      urls %>% length() / 2 == (nrow(data))
    if (data_match) {
      data <-
        data %>%
        mutate(idRow = 1:n()) %>%
        left_join(
          tibble(urlCIKFiler = urls[c(FALSE,TRUE)] %>% paste0('&start=0&count=100'),
                 urlSECFilingDirectory = urls[c(FALSE, TRUE)]) %>%
            mutate(idRow = 1:n())

        ) %>%
        suppressWarnings() %>%
        suppressMessages()
    }

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)}
    data
  }

.parse_most_recent_stream <-
  function(url = "https://www.sec.gov/cgi-bin/browse-edgar?company=&CIK=&type=&owner=include&count=100&action=getcurrent",
           return_message = TRUE) {
    cookies = c(
      'ak_bmsc' = 'BA09B9B7E282820D33A847AB6A4F577C~000000000000000000000000000000~YAAQEIXYF9KjfYuAAQAAtA9NkA/Asr69IqJoGtc80iFGaJxeyP/fnqUjhwOkojONIdc3b8zucr8+he1KdbmavGbRp6Bg53ClwHiw0u8kKwNbaufzhSpP74TwvSUfZhrK4Xk1CBHIR1BGudopKt6ds20EpWSCAos3IxzOKIGoGqBOHSIatGivOOcRK/l79CSBT8c7hvz/dF/jvduX42v4cOr5ff1p973FSHh6Pag8I+do5iiJJ9gch1a241qP1XOoz68SyvRYr77owlIoLuSvHiVhJzwVAnu0xZQxGWTWxAjvRPO4oY+p9asvti0DnuqL4VBnBqZFFTuq1Kdj3alR/dtzSU0EhLP7ij8Q6yU/P1jL4hwxnvUWKxSXkNho9DYUrVmOMvk+tYIMvpdj2+yH2fvz6n1JMXt1ovIq27f0SA==',
      'bm_sv' = 'F8C2F590FCEE07B36615FEA14209764D~YAAQEIXYFw2mfYuAAQAAsmpNkA/mkbJEFnhI9pVm9obV/qxGm3cqtAprkBZr42oLuRJF1PuYuEl9UZP/1QFAo0Uzy+Y8qQri6/BCoEczGMA36G2JWqz2WnV44Jux4vUP8sCpnBCeTYRFkAJWS8tpuawox2wws0IanSoClmvxL9eeP0R88baDAhujCikpQhG1/4WH6zUQpVsmStrp3IMBtoOjU5BVjB2W/ql7/Elb9D2dwEqQheWrqQYUorOY~1'
    )

    headers = c(
      `authority` = 'www.sec.gov',
      `accept` = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
      `accept-language` = 'en-US,en;q=0.9',
      `cache-control` = 'no-cache',
      `pragma` = 'no-cache',
      `sec-fetch-dest` = 'document',
      `sec-fetch-mode` = 'navigate',
      `sec-fetch-site` = 'none',
      `sec-fetch-user` = '?1',
      `sec-gpc` = '1',
      `upgrade-insecure-requests` = '1',
      `user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
    )

    res <- httr::GET(url = url, httr::add_headers(.headers=headers), httr::set_cookies(.cookies = cookies))

    page <- res %>% xml2::read_html()

    url_directory <-
      page %>%
      html_nodes('a+ table td:nth-child(2) a:nth-child(1)') %>%
      html_attr(name = 'href') %>%
      paste0('https://www.sec.gov', .)

    urlTextFilingFull <-
      page %>%
      html_nodes('div a+ a') %>%
      html_attr(name = 'href') %>%
      paste0('https://www.sec.gov', .)

    forms <-
      page %>%
      html_nodes('a+ table td:nth-child(1)') %>%
      html_text() %>%
      str_trim()

    forms <-
      forms[!forms == '']

    url_directory <-
      page %>%
      html_nodes('a+ table td:nth-child(2) a:nth-child(1)') %>%
      html_attr('href') %>%
      paste0('https://www.sec.gov',.)

    filing_descriptions <-
      page %>%
      html_nodes('.small') %>%
      html_text() %>%
      str_trim()

    df_descriptions <-
      seq_along(filing_descriptions) %>%
      future_map_dfr(function(x){
        description <-
          filing_descriptions[[x]] %>%
          str_to_upper()

        has_act <-
          description %>% str_detect("ACT:")

        if (has_act) {
          items <-
            description %>%
            str_split("ACCESSION NUMBER: ") %>%
            flatten_chr() %>%
            str_replace_all('\n','')

          filing_description <-
            items[[1]]

          items <-
            items[[2]] %>%
            str_split('ACT:') %>%
            flatten_chr() %>%
            str_trim()

          accession <-
            items[1]

          items <-
            items[[2]] %>%
            str_split("SIZE:") %>%
            flatten_chr() %>%
            str_trim()

          df <-
            tibble(idRow = x,
                   descriptionFiling = filing_description,
                   idAccession = accession,
                   idSECAct = items[[1]],
                   descriptionFileSize = items[[2]])
          return(df)

        }

        items <-
          description %>%
          str_split("ACCESSION NUMBER: ") %>%
          flatten_chr() %>%
          str_replace_all('\n','')

        filing_description <-
          items[[1]]

        items <-
          items[[2]] %>%
          str_split('SIZE:') %>%
          flatten_chr() %>%
          str_trim()

        df <-
          tibble(idRow = x,
                 descriptionFiling = filing_description,
                 idAccession = items[[1]],
                 descriptionFileSize = items[[2]])
        return(df)
      })

    filer_description <-
      page %>%
      html_nodes('td:nth-child(3) a') %>%
      html_text()

    df_filers <-
      seq_along(filer_description) %>%
      future_map_dfr(function(x){
        filer <-
          filer_description[[x]] %>%
          str_to_upper()

        is_messed <-
          filer %>% str_count("\\(") > 2

        if (!is_messed) {
          values <-
            filer %>%
            str_split('\\(') %>%
            flatten_chr() %>%
            str_replace_all('[\\)]','') %>%
            str_trim()

          df <-
            tibble(idRow = x, item = c('nameEntityLegal', 'idCIK', 'typeSECEntity'), value = values) %>%
            spread(item, value) %>%
            mutate(idCIK = idCIK %>% as.numeric())

          df <-
            df %>%
            resolve_legal_name() %>%
            select(idCIK, nameEntity, everything())
          return(df)
        }

        if (is_messed) {
          values <-
            filer %>%
            str_split('\\(') %>%
            flatten_chr() %>%
            str_replace_all('[\\)]','') %>%
            str_trim()

          values <-
            c(list(values[1], values[2]) %>%
                purrr::reduce(paste), values[3], values[4])

          df <-
            tibble(idRow = x, item = c('nameEntityLegal', 'idCIK', 'typeSECEntity'), value = values) %>%
            spread(item, value) %>%
            mutate(idCIK = idCIK %>% as.numeric())

          df <-
            df %>%
            resolve_legal_name() %>%
            select(idCIK, nameEntity, everything())
          return(df)
        }
      })

    url_cik_filer <-
      page %>%
      html_nodes('td:nth-child(3) a') %>%
      html_attr('href') %>%
      paste0('https://www.sec.gov',.) %>%
      paste0(., '&start=0')

    datetime_accepted <-
      page %>%
      html_nodes(css = '.small+ td') %>%
      html_text() %>%
      lubridate::ymd_hms()

    date_filed <-
      page %>%
      html_nodes('td:nth-child(5)') %>%
      html_text() %>%
      lubridate::ymd()

    file_film <- page %>%
      html_nodes('td:nth-child(6)') %>%
      html_text()

    df_films <-
      seq_along(file_film) %>%
      future_map_dfr(function(x){
        parts <-
          file_film[[x]] %>% str_split('\n') %>% flatten_chr()
        tibble(
          idRow = x,
          idFile = parts[[1]] %>% stringr::str_trim(),
          idSECFiling = parts[[2]] %>% as.numeric()
        ) %>%
          mutate(
            urlSECFile = list(
              "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&filenum=",
              idFile,
              '&owner=include&count=100000'
            ) %>% purrr::reduce(paste0)
          )
      })

    data <-
      df_filers %>%
      left_join(df_descriptions, by = "idRow") %>%
      select(-idRow) %>%
      mutate(urlTextFilingFull,
             dateFiling = date_filed,
             datetimeAccepted = datetime_accepted,
             idForm = forms,
             urlSECFilingDirectory = url_directory,
             urlCIKFIler = url_cik_filer,
             urlSearch = url) %>%
      select(idForm, everything()) %>%

      find_target_filings()

    if (df_films %>% nrow() == data %>% nrow()) {
      data <-
        data %>%
        mutate(idRow = 1:n()) %>%
        left_join(df_films, by = "idRow") %>%
        select(-idRow)
    }

    if(return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }
    data <-
      data %>%
      mutate_at(
        data %>% select_if(is.character) %>% select(-dplyr::matches("url")) %>% names(),
        funs(ifelse(. == '', NA, .) %>% str_to_upper())
      )
    if ('descriptionFileSize' %in% names(data)) {
      data <-
        data %>%
        mutate(
          typeFileDocument = descriptionFileSize %>% map_chr(stringi::stri_extract_last_boundaries),
          sizeFile = readr::parse_number(as.character(descriptionFileSize)),
          sizeFileBytes = ifelse(typeFileDocument == "MB", sizeFile * 1024, 1048576 * sizeFile)
        ) %>%
        select(-c(typeFileDocument, descriptionFileSize, sizeFile))
    }

    return(data)
  }

.get_most_recent_filing_urls <-
  function(filing_type = NULL, pages_out = 20) {
    start_pages <-
      seq(0, by = 100, length.out = pages_out)
    if ('dfEnd' %>% exists()) {
      eval(rm(dfEnd))
    }

    if (length(filing_type) == 0) {
      slug_filing <-
        ''
    } else {
      if (filing_type %>% str_to_lower() == 'all') {
        slug_filing <-
          ''
      } else {
        slug_filing <-
          filing_type
      }
    }

    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&datea=&dateb=&company=&type=',slug_filing, '&SIC=&State=&Country=&CIK=&owner=include&accno=&start=',
           start_pages, '&count=100') %>%
      purrr::reduce(paste0)

    is_on <-
      TRUE
    for (url in urls) {
      if (!is_on) {
        invisible()
      }
      if('dfEnd' %>% exists()) {
        invisible()
      } else {
        df_end <-
          .guess_page_ongoing(url = url, override = FALSE)
        is_over_zero <-
          df_end %>% length() > 0
        if (is_over_zero) {
          assign('dfEnd', eval(df_end), envir = .GlobalEnv)
          assign('is_on', eval(FALSE), envir = .GlobalEnv)
          rm(is_over_zero)
        }
      }
    }
    still_none <-
      df_end %>% length() == 0
    if (still_none)  {
      df_end <-
        urls %>%
        future_map_dfr(function(x){
          .guess_page_ongoing(url = x, override = TRUE)
        })
      df_end <-
        df_end %>%
        slice(nrow(df_end))
    }

    if ('countPage' %in% names(df_end)) {
      df_end <-
        df_end %>%
        dplyr::rename(countStart = countPage)
    }

    if (df_end$countStart < 0) {
      df_end <-
        df_end %>%
        mutate(countStart = 0)
    }

    if (slug_filing == '') {
      df_end <-
        df_end %>%
        mutate(countStart = 2000)
    }

    length_actual_pages <-
      ceiling(df_end$countStart/100)
    length_actual <-
      seq(0, by = 100, length.out =  length_actual_pages)
    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&datea=&dateb=&company=&type=',slug_filing, '&SIC=&State=&Country=&CIK=&owner=include&accno=&start=',
           length_actual, '&count=100') %>%
      purrr::reduce(paste0)

    df_mr_urls <-
      tibble(urlPageFiling = urls) %>%
      mutate(countPage = 1:n())

    if (length(filing_type) > 0) {
      df_mr_urls <-
        df_mr_urls %>%
        mutate(idForm = filing_type) %>%
        select(idForm, everything())
    }
    if('dfEnd' %>% exists()){
      rm(list = c('dfEnd'), pos = ".GlobalEnv")
    }
    return(df_mr_urls)
  }


.sec_filing_most_recent <-
  function(filing_type = NULL, return_message = TRUE) {
    get_most_recent_filing_urls_safe <-
      purrr::possibly(.get_most_recent_filing_urls, tibble())
    url_df <-
      get_most_recent_filing_urls_safe(filing_type = filing_type)

    if (length(filing_type) == 0) {
      filing_name <-
        'all'
    } else {
      filing_name <-
        filing_type
    }

    parse_most_recent_stream_safe <-
      purrr::possibly(.parse_most_recent_stream, tibble())

    all_data <-
      url_df$urlPageFiling %>%
      future_map_dfr(function(x){
        parse_most_recent_stream_safe(url = x, return_message = return_message)
      }) %>%
      mutate(idFormName = filing_name) %>%
      select(idFormName, everything())

    if (return_message) {
      list("\nReturned ", all_data %>% nrow() %>% formattable::comma(digits = 0),
           ' of the most recent filings from ', filing_type, ' forms\n') %>%
        purrr::reduce(paste0) %>%
        cat(fill = T)
    }
    return(all_data)
  }

#' Most recent EDGAR filings by type
#'
#' @param forms
#' @param nest_data
#' @param return_message
#'
#' @return
#' @export
#' @import dplyr tidyr purrr stringr formattable readr lubridate XBRL curl jsonlite lazyeval
#' @importFrom jsonlite fromJSON
#' @examples
edgar_recent_filings <-
  function(forms = c("All", "10-D", "10-K"),
           table_name_initial = "Recent Filings",
           parse_all_filings = TRUE,
           parse_form_d = FALSE,
           parse_complete_text_filings = FALSE,
           parse_13F = FALSE,
           parse_small_offerings =  FALSE,
           parse_form_3_4s =  FALSE,
           parse_asset_files = FALSE,
           parse_xbrl =  FALSE,
           assign_to_environment =  TRUE,
           nest_data = FALSE,
           return_message = TRUE) {
    sec_filing_most_recent_safe <-
      purrr::possibly(.sec_filing_most_recent, tibble())

    all_data <-
      forms %>%
      future_map_dfr(function(x) {
        x %>% message()
        .sec_filing_most_recent(filing_type = x,
                                return_message = return_message)
      }) %>%
      select(dplyr::matches("dateFiling"),
             idCIK,
             nameEntity,
             idForm,
             everything())

    all_data <-
      all_data %>%
      select(-dplyr::matches("datetimeAccepted|^is[A-Z]|^has[A-Z]|is13FFiling")) %>%
      parse_for_tables(
        table_name_initial = table_name_initial,
        parse_all_filings = parse_all_filings,
        parse_complete_text_filings = parse_complete_text_filings,
        parse_form_d = parse_form_d,
        parse_13F = parse_13F,
        parse_small_offerings = parse_small_offerings,
        parse_form_3_4s = parse_form_3_4s,
        parse_asset_files = parse_asset_files,
        parse_xbrl = parse_xbrl,
        assign_to_environment = assign_to_environment,
        nest_data = nest_data,
        return_message = return_message
      )
    return(all_data)

  }

# SEC index logs ----------------------------------------------------------

.get_year_index_urls <-
  function(url = "https://www.sec.gov/Archives/edgar/daily-index/2016/") {
    yearData <-
      url %>%
      str_replace_all('https://www.sec.gov/Archives/edgar/daily-index|/','') %>%
      as.character() %>%
      readr::parse_number()

    page <-
      url %>%
      read_html()

    quarters <-
      page %>%
      html_nodes('td a') %>%
      html_attr('href') %>%
      str_replace_all('\\QTR|/','') %>%
      as.character() %>%
      readr::parse_number()

    urls <-
      page %>%
      html_nodes('td a') %>%
      html_attr('href') %>%
      list(url, .) %>%
      purrr::reduce(paste0)

    url_df <-
      tibble(idQuarter =quarters, yearData,
             urlQuarter = urls)
    return(url_df)
  }

.parse_quarter_urls <-
  function(url = "https://www.sec.gov/Archives/edgar/daily-index/2012/QTR4/",
           index_type = 'master',
           return_message = TRUE) {
    page <-
      url %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('td a') %>%
      html_attr('href')

    slugs <-
      slugs[!slugs %>% str_detect("xml")]

    urls <-
      list(url, slugs) %>%
      purrr::reduce(paste0)

    df_urls <-
      tibble(slugs, urlSECIndex = urls) %>%
      tidyr::separate(slugs,
                      into = c('typeIndex', 'dateData', 'remove'),
                      sep = '\\.')

    if (df_urls$dateData[[1]] %>% lubridate::ymd() %>% is.na()) {
      df_urls <-
        df_urls %>%
        mutate(dateIndex = dateData %>% lubridate::mdy()) %>%
        select(-c(remove)) %>%
        mutate(urlQuarter = url)

    } else {
      df_urls <-
        df_urls %>%
        mutate(dateIndex = dateData %>% lubridate::ymd()) %>%
        select(-c(remove)) %>%
        mutate(urlQuarter = url)

    }


    if (length(index_type) == 0) {
      df_urls <-
        df_urls %>%
        filter(typeIndex == 'master')
    } else {
      df_urls <-
        df_urls %>%
        filter(typeIndex == index_type)
    }

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df_urls)

  }

.parse_index_filing_page <-
  function(url = "https://www.sec.gov/Archives/edgar/daily-index/1994/QTR3/company.070194.idx",
           return_message = TRUE) {
    start_skip <-
      url %>%
      read_lines() %>%
      grep('------', .)

    url_slug <-
      url %>%
      str_split('\\/') %>%
      flatten_chr() %>%
      .[length(.)] %>%
      str_replace_all('\\.idx', '') %>%
      str_split('\\.') %>%
      flatten_chr()

    index_type <-
      url_slug[[1]]

    index_date <-
      url_slug[[2]] %>%
      as.numeric() %>%
      lubridate::ymd() %>%
      suppressMessages()

    if (index_date %>% is.na()) {
      index_date <-
        url_slug[[2]] %>%
        as.numeric() %>%
        lubridate::mdy()
    }

    df <-
      url %>%
      readr::read_table(skip = start_skip,
                        col_names = FALSE,
                        progress = FALSE) %>%
      suppressMessages() %>%
      suppressWarnings()

    if (df %>% ncol() == 1) {
      df <-
        df %>%
        separate(X1, sep = '\\|',
                 c(
                   'idCIK',
                   'nameEntityLegal',
                   'idForm',
                   'dateFiling',
                   'slugAccension'
                 ))

      df <-
        df %>%
        mutate(idCIK = idCIK %>% as.numeric()) %>%
        mutate(nameEntity = nameEntityLegal %>% str_to_upper() %>% str_replace_all('\\.|\\,', '') %>% str_trim(),
               dateIndex = index_date,
               typeIndex = index_type) %>%
        separate(nameEntity,
                 sep = '\\ /',
                 into = c('nameEntity', 'idLocationEntity')) %>%
        mutate(
          dateFiling = dateFiling %>% lubridate::ymd(),
          idLocationEntity = idLocationEntity %>% str_replace_all('\\/', '') %>% str_trim()
        ) %>%
        suppressWarnings() %>%
        suppressMessages() %>%
        mutate(
          urlSECFilingText = list("https://www.sec.gov/Archives/", slugAccension) %>%
            purrr::reduce(paste0),
          urlSECFilingDirectory = urlSECFilingText %>% str_replace_all(".txt", '-index.html')
        ) %>%
        select(nameEntity, idLocationEntity, everything()) %>%
        suppressWarnings()

      df <-
        df %>%
        mutate(
          dataAccension = slugAccension %>% str_replace_all('edgar/data/|.txt', ''),
          urlSECIndex = url
        ) %>%
        tidyr::separate(dataAccension,
                        sep = '\\/',
                        into = c('remove', 'idAccession')) %>%
        select(-remove) %>%
        tidyr::separate(
          idAccession,
          into = c('idCIKFiler', 'codeYear', 'countFilerYearFilings'),
          remove = FALSE,
          sep = '\\-'
        ) %>%
        mutate_at(
          c('idCIKFiler', 'codeYear', 'countFilerYearFilings'),
          funs(. %>% as.numeric())
        ) %>%
        mutate(hasDifferentSECFiler = ifelse(!idCIK == idCIKFiler, TRUE, FALSE)) %>%
        select(
          typeIndex,
          dateIndex,
          dateFiling,
          idCIK,
          nameEntity,
          idForm,
          idAccession,
          countFilerYearFilings,
          hasDifferentSECFiler,
          everything()
        ) %>%
        suppressWarnings() %>%
        suppressMessages()

      if (return_message) {
        list("Parsed: ", url) %>%
          purrr::invoke(paste0, .) %>% cat(fill = T)
      }
      return(df)
    }


    is_form <-
      index_type == 'form'
    if (is_form) {
      if (df %>% ncol() == 6) {
        df <-
          df %>%
          tidyr::unite(X2, X2, X3, sep = ' ')
      }

      df <-
        df %>%
        purrr::set_names(c(
          'idForm',
          'nameEntityLegal',
          'idCIK',
          'dateFiling',
          'slugAccension'
        ))
    }

    if (!is_form) {
      if (df %>% ncol() == 6) {
        df <-
          df %>%
          tidyr::unite(X1, X1, X2, sep = ' ')
      }
      df <-
        df %>%
        purrr::set_names(c(
          'nameEntityLegal',
          'idForm',
          'idCIK',
          'dateFiling',
          'slugAccension'
        ))
    }

    df <-
      df %>%
      mutate(nameEntity = nameEntityLegal %>% str_to_upper() %>% str_replace_all('\\.|\\,', '') %>% str_trim(),
             dateIndex = index_date,
             typeIndex = index_type) %>%
      separate(nameEntity,
               sep = '\\ /',
               into = c('nameEntity', 'idLocationEntity')) %>%
      mutate(
        dateFiling = dateFiling %>% lubridate::ymd(),
        idLocationEntity = idLocationEntity %>% str_replace_all('\\/', '') %>% str_trim()
      ) %>%
      suppressWarnings() %>%
      suppressMessages()

    is_http <-
      df$slugAccension %>% str_count('\\.htm') %>% sum() / nrow(df) > .5

    if (is_http) {
      df <-
        df %>%
        dplyr::rename(urlSECFilingDirectory = slugAccension) %>%
        select(
          typeIndex,
          dateIndex,
          dateFiling,
          idCIK,
          nameEntity,
          everything())

      if (return_message) {
        list("Parsed: ", url) %>%
          purrr::invoke(paste0, .) %>% cat(fill = T)
      }

      return(df)
    }

    df <-
      df %>%
      mutate(
        urlSECFilingText = list("https://www.sec.gov/Archives/", slugAccension) %>%
          purrr::reduce(paste0),
        urlSECFilingDirectory = urlSECFilingText %>% str_replace_all(".txt", '-index.html')
      ) %>%
      select(nameEntity, idLocationEntity, everything()) %>%
      suppressWarnings()

    df <-
      df %>%
      mutate(
        dataAccension = slugAccension %>% str_replace_all('edgar/data/|.txt', ''),
        urlSECIndex = url
      ) %>%
      tidyr::separate(dataAccension,
                      sep = '\\/',
                      into = c('remove', 'idAccession')) %>%
      select(-remove) %>%
      tidyr::separate(
        idAccession,
        into = c('idCIKFiler', 'codeYear', 'countFilerYearFilings'),
        remove = FALSE,
        sep = '\\-'
      ) %>%
      mutate_at(
        c('idCIKFiler', 'codeYear', 'countFilerYearFilings'),
        funs(. %>% as.numeric())
      ) %>%
      mutate(hasDifferentSECFiler = ifelse(!idCIK == idCIKFiler, TRUE, FALSE)) %>%
      select(
        typeIndex,
        dateIndex,
        dateFiling,
        idCIK,
        nameEntity,
        idAccession,
        countFilerYearFilings,
        hasDifferentSECFiler,
        everything()
      ) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df)
  }

.get_years_page_urls <-
  function(years = 1994:2017,
           index_type = 'master',
           return_message = TRUE) {

    wrong_years <-
      (years < 1993) %>% as.numeric() %>% sum(na.rm = TRUE) > 0

    if (wrong_years) {
      stop("Years to search start in 1994")
    }

    urls <-
      list('https://www.sec.gov/Archives/edgar/daily-index/',
           years,
           '/') %>%
      purrr::reduce(paste0)

    df_urls <-
      urls %>%
      future_map_dfr(function(x) {
        .get_year_index_urls(url = x)
      })

    all_url_df <-
      df_urls$urlQuarter %>%
      future_map_dfr(function(x) {
        .parse_quarter_urls(url = x,
                           index_type = index_type,
                           return_message = return_message)
      }) %>%
      suppressWarnings()

    all_url_df <-
      all_url_df %>%
      left_join(df_urls) %>%
      select(yearData, idQuarter, everything()) %>%
      suppressMessages() %>%
      select(-dateData)

    return(all_url_df)
  }

#' SEC filing streams
#'
#' This function parses daily
#' SEC filing log starting in 1994
#' for specified periods
#'
#' @param start_date starting date in year-month-date form
#' @param end_date ending date starting in year-month-date form
#' @param only_most_recent_data \code{TRUE} return only most recent day's filing stream
#' @param index_type type of index to parse \itemize{
#' \item \code{master}: parses master log (default)
#' \item \code{compamy}: parses company log
#' \item \code{filer}: parses filer log
#' }
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @import dplyr tidyr purrr stringr formattable readr lubridate XBRL curl jsonlite lazyeval
#' @importFrom jsonlite fromJSON
#' @export
#' @family SEC
#' @family filing search
#' @examples
#' \dontrun{
#' edgar_filing_streams(start_date = "2016-01-01",
#' end_date = Sys.Date(), only_most_recent_data = FALSE, index_type = 'master',
#' nest_data = TRUE,
#' return_message = TRUE)
#' }

edgar_filing_streams <-
  function(start_date = "2017-02-15",
           end_date = Sys.Date(),
           only_most_recent_data = FALSE,
           index_type = 'master',
           table_name_initial = "Filing Logs",
           parse_all_filings = FALSE,
           parse_complete_text_filings = FALSE,
           parse_form_d = FALSE,
           parse_13F = FALSE,
           parse_small_offerings =  FALSE,
           parse_form_3_4s =  FALSE,
           parse_asset_files = FALSE,
           parse_xbrl =  FALSE,
           assign_to_environment =  TRUE,
           nest_data = TRUE,
           return_message = TRUE) {

    start_date <-
      start_date %>%
      lubridate::ymd()

    end_date <-
      end_date %>%
      lubridate::ymd()

    start_year <-
      lubridate::year(start_date)

    end_year <-
      end_date %>% lubridate::year()

    search_years <-
      start_year:end_year

    if (only_most_recent_data) {
      search_years <-
        Sys.Date() %>% lubridate::year()

      df_urls <-
        .get_years_page_urls(years = search_years,
                            index_type = index_type,
                            return_message = return_message)

      urls <-
        df_urls %>%
        slice(nrow(df_urls)) %>%
        .$urlSECIndex
    }

    if (!only_most_recent_data) {
      df_urls <-
        .get_years_page_urls(years = search_years,
                            index_type = index_type,
                            return_message = return_message)

      urls <-
        df_urls %>%
        filter(dateIndex >= start_date) %>%
        filter(dateIndex <= end_date) %>%
        .$urlSECIndex
    }

    parse_index_filing_page_safe <-
      purrr::possibly(.parse_index_filing_page, tibble())

    all_data <-
      seq_along(urls) %>%
      future_map_dfr(function(x) {
        parse_index_filing_page_safe(url = urls[[x]], return_message = return_message)
      })

    all_data <-
      all_data %>%
      left_join(df_urls %>% select(urlSECIndex, yearData, idQuarter)) %>%
      select(yearData, idQuarter, dateIndex, everything())
    all_data <-
      all_data %>%
      mutate_at(all_data %>% select(dplyr::matches("count")) %>% names(),
                funs(. %>% formattable::comma(digits = 0))) %>%
      select(-dplyr::matches("slugAccension"))

    if (return_message) {
      list(
        "Parsed ",
        all_data %>% nrow() %>% formattable::comma(digits = 0),
        " SEC filings from ",
        all_data$dateIndex %>% min(na.rm = T),
        ' to ',
        all_data$dateIndex %>% max(na.rm = TRUE)
      ) %>%
        purrr::reduce(paste0) %>%
        cat(fill = T)
    }

    all_data <-
      all_data %>%
      parse_for_tables(
        table_name_initial = table_name_initial,
        parse_all_filings = parse_all_filings,
        parse_form_d = parse_form_d,
        parse_13F = parse_13F,
        parse_small_offerings = parse_small_offerings,
        parse_form_3_4s = parse_form_3_4s,
        parse_asset_files = parse_asset_files,
        parse_xbrl = parse_xbrl,
        nest_data = nest_data,
        return_message = return_message
      )

    all_data
  }




# CIK Search --------------------------------------------------------------


.guess_page_ongoing <-
  function(url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=1184765&type=&dateb=&owner=include&start=0&count=100",
           override = FALSE) {
    cookies = c(
      'ak_bmsc' = 'BA09B9B7E282820D33A847AB6A4F577C~000000000000000000000000000000~YAAQEIXYF9KjfYuAAQAAtA9NkA/Asr69IqJoGtc80iFGaJxeyP/fnqUjhwOkojONIdc3b8zucr8+he1KdbmavGbRp6Bg53ClwHiw0u8kKwNbaufzhSpP74TwvSUfZhrK4Xk1CBHIR1BGudopKt6ds20EpWSCAos3IxzOKIGoGqBOHSIatGivOOcRK/l79CSBT8c7hvz/dF/jvduX42v4cOr5ff1p973FSHh6Pag8I+do5iiJJ9gch1a241qP1XOoz68SyvRYr77owlIoLuSvHiVhJzwVAnu0xZQxGWTWxAjvRPO4oY+p9asvti0DnuqL4VBnBqZFFTuq1Kdj3alR/dtzSU0EhLP7ij8Q6yU/P1jL4hwxnvUWKxSXkNho9DYUrVmOMvk+tYIMvpdj2+yH2fvz6n1JMXt1ovIq27f0SA==',
      'bm_sv' = 'F8C2F590FCEE07B36615FEA14209764D~YAAQEIXYFw2mfYuAAQAAsmpNkA/mkbJEFnhI9pVm9obV/qxGm3cqtAprkBZr42oLuRJF1PuYuEl9UZP/1QFAo0Uzy+Y8qQri6/BCoEczGMA36G2JWqz2WnV44Jux4vUP8sCpnBCeTYRFkAJWS8tpuawox2wws0IanSoClmvxL9eeP0R88baDAhujCikpQhG1/4WH6zUQpVsmStrp3IMBtoOjU5BVjB2W/ql7/Elb9D2dwEqQheWrqQYUorOY~1'
    )

    headers = c(
      `authority` = 'www.sec.gov',
      `accept` = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
      `accept-language` = 'en-US,en;q=0.9',
      `cache-control` = 'no-cache',
      `pragma` = 'no-cache',
      `sec-fetch-dest` = 'document',
      `sec-fetch-mode` = 'navigate',
      `sec-fetch-site` = 'none',
      `sec-fetch-user` = '?1',
      `sec-gpc` = '1',
      `upgrade-insecure-requests` = '1',
      `user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
    )

    res <-
      httr::GET(url = url, httr::add_headers(.headers=headers), httr::set_cookies(.cookies = cookies))

    page <- res %>% xml2::read_html()

    page_count <-
      url %>% str_split('start=') %>%
      flatten_chr() %>%
      .[[2]] %>%
      str_split('&') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.character() %>%
      readr::parse_number()

    items <-
      page %>%
      html_nodes('input') %>%
      html_attr('value') %>% str_to_upper() %>%
      unique()

    if (items %>% length() == 0){
      return(invisible())
    }

    no_page <-
      page %>%
      html_nodes('h1') %>%
      html_text() %>%
      str_to_lower() == 'invalid parameter'

    no_page <-
      no_page %>%
      length() > 0
    is_end <-
      !items %>% str_detect("NEXT 100") %>% sum(na.rm = T) > 0

    if (is_end & (!no_page)) {
      return(tibble(isEnd = TRUE, countStart = page_count))
    }
    if (!override) {
      if (!is_end) {
        return(tibble())
      }
    } else {
      return(tibble(countStart = page_count))
    }
    tibble(isEnd = is_end, countPage = page_count -100)
  }

.parse_search_page <-
  function(urls = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company=Bank&owner=exclude&match=&start=500&count=100&hidefilings=0",
           return_message = TRUE) {
    df <-
      tibble()
    success <- function(res){
      if (return_message) {
        list("Parsing: ", res$url, "\n") %>% purrr::reduce(paste0) %>% cat(fill = T)
      }
      page <-
        res$content %>%
        read_html()
      cik <-
        page %>%
        html_nodes('td:nth-child(1) a') %>%
        html_text() %>%
        as.numeric()

      entities <-
        page %>%
        html_nodes('td:nth-child(2)') %>%
        html_text() %>%
        str_to_upper()

      locations <-
        page %>%
        html_nodes('td:nth-child(3)') %>%
        html_text() %>%
        str_to_title() %>%
        str_trim()

      locations[locations == ''] <-
        NA

      data <-
        tibble(
          idCIK = cik,
          nameEntityLegal = entities,
          codeLocationBusiness = locations
        ) %>%
        mutate(codeLocationBusiness = codeLocationBusiness %>% str_to_upper()) %>%
        separate(nameEntityLegal,
                 into = c('nameEntityLegal', 'sic'),
                 sep = 'SIC: ') %>%
        separate(sic,
                 into = c('idSIC', 'nameIndustry'),
                 sep = '-') %>%
        mutate(
          idSIC = idSIC %>% str_trim() %>% as.numeric(),
          nameIndustry = nameIndustry %>% str_trim()
        ) %>%
        suppressWarnings() %>%
        suppressMessages() %>%
        select(which(colMeans(is.na(.)) < 1)) %>%
        mutate(nameEntity = nameEntityLegal %>% str_to_upper() %>% str_replace_all('\\.|\\,', '') %>% str_trim()) %>%
        select(idCIK, nameEntity, everything()) %>%
        separate(nameEntity,
                 sep = '\\ /',
                 into = c('nameEntity', 'idLocationEntity')) %>%
        mutate(
          nameEntity = nameEntity %>% gsub('/', '', .),
          idLocationEntity = idLocationEntity %>% str_replace_all('\\/', '') %>% str_trim()
        ) %>%
        suppressWarnings() %>%
        suppressMessages() %>%
        select(-dplyr::matches("idLocationEntity"))
      df <<-
        df %>%
        bind_rows(data)
    }
    failure <- function(msg){
      tibble()
    }
    urls %>%
      walk(function(x) {
        curl_fetch_multi(url = x, success, failure)
      })
    multi_run()
    df
  }

.parse_search_page_length <-
  function(search_term = "BREA", pages_out = 5) {
    term <-
      search_term %>% URLencode()

    start_pages <-
      seq(0, by = 100, length.out = pages_out)
    if ('dfEnd' %>% exists()) {
      eval(rm(dfEnd))
    }
    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company=',term, '&type=&dateb=&owner=include&start=',
           start_pages, '&count=100') %>%
      purrr::reduce(paste0)

    is_on <-
      TRUE
    for (url in urls) {
      if (!is_on) {
        invisible()
      }
      if('dfEnd' %>% exists()) {
        invisible()
      } else {
        df_end <-
          .guess_page_ongoing(url = url, override = FALSE)
        is_over_zero <-
          df_end %>% length() > 0
        if (is_over_zero) {
          assign('dfEnd', eval(df_end), envir = .GlobalEnv)
          assign('is_on', eval(FALSE), envir = .GlobalEnv)
          rm(is_over_zero)
        }
      }
    }
    still_none <-
      df_end %>% length() == 0
    if (still_none)  {
      df_end <-
        urls %>%
        future_map_dfr(function(x){
          .guess_page_ongoing(url = x, override = TRUE)
        })
      df_end <-
        df_end %>%
        slice(nrow(df_end))
    }

    if (df_end %>% ncol() == 0) {
      df_end <-
        tibble(countStart = 0)
    }
    length_actual_pages <-
      ceiling(df_end$countStart/100)

    length_actual <-
      seq(0, by = 100, length.out =  length_actual_pages)
    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company=',term, '&type=&dateb=&owner=include&start=',
           length_actual, '&count=100') %>%
      purrr::reduce(paste0)

    df_filing_urls <-
      tibble(nameSearch = search_term, urlCIKPageFiling = urls) %>%
      mutate(countPage = 1:n())
    if('dfEnd' %>% exists()){
      rm(list = c('dfEnd'), pos = ".GlobalEnv")
    }

    if ('is_on' %>% exists()) {
      rm(list = c('is_on'), pos = ".GlobalEnv")
    }
    return(df_filing_urls)
  }

.entity_ciks <-
  function(search_term = "BREA", return_message = TRUE) {
    url_df <-
      .parse_search_page_length(search_term = search_term)

    data <-
      url_df$urlCIKPageFiling %>%
      future_map_dfr(function(x) {
        .parse_search_page(url = x, return_message = FALSE)
      }) %>%
      mutate(nameSearch = search_term) %>%
      select(nameSearch, everything())

    if (return_message) {
      list("Returned ", nrow(data) %>% formattable::comma(digits = 0),
           ' SEC registered entities for the term ', search_term) %>%
        purrr::reduce(paste0) %>%
        cat(fill = T)
    }

    return(data)
  }

#' SEC registered entity search
#'
#' @param search_names
#' @param nest_data
#' @param return_message
#'
#' @return
#' @export
#' @import dplyr tidyr purrr stringr formattable readr lubridate XBRL curl jsonlite lazyeval
#' @importFrom jsonlite fromJSON
#' @examples
#' edgar_entities_cik( c("Rockwood", "BREA", 'EJF'))
edgar_entities_cik <-
  function(search_names,
           nest_data = FALSE,
           return_message = TRUE) {

    .sec_entity_safe <-
      purrr::possibly(.entity_ciks, tibble())

    all_data <-
      search_names %>%
      future_map_dfr(function(x) {
        .sec_entity_safe(search_term = x, return_message = return_message)
      }) %>%
      select(which(colMeans(is.na(.)) < 1))


    if (data %>% hasName("nameEntity")) {
      all_data <-
        all_data %>%
        mutate(nameEntity = nameEntityLegal %>% str_to_upper() %>% str_replace_all('\\.|\\,', '') %>% str_trim())
    }

    all_data <-
      all_data %>%
      select(-dplyr::matches("idLocationEntity")) %>%
      separate(nameEntity,
               sep = '\\ /',
               into = c('nameEntity', 'idLocationEntity')) %>%
      mutate(idLocationEntity = idLocationEntity %>% str_replace_all('\\/', '') %>% str_trim()) %>%
      select(nameSearch, idCIK, nameEntity, everything())



    all_data <-
      all_data %>%
      separate(nameEntity, sep = 'FORMERLY: ', c('nameEntity', 'nameEntityFormer')) %>%
      dplyr::select(which(colMeans(is.na(.)) < 1))


    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-c(nameSearch), .key = dataSearch)
    }


  }


# sec_metadata  -----------------------------------------------------------



.extract_info <- function(page, css_node) {
  page %>%
    html_nodes(css = css_node) %>%
    html_text()
}

.parse_city_state <-
  function(x = "MENLO PARK CA 94025") {
    parts <- x %>% str_split('\\ ') %>% flatten_chr()
    over_2 <- parts %>% length() > 2
    if (over_2) {
      zipcode <- parts[parts %>% length()]
      state_city <- parts[!parts %in% c(zipcode)]
      state <- state_city[length(state_city)]
      city <-
        state_city[!state_city %in% state] %>% str_c(collapse = ' ')
      data <-
        tibble(
          cityCompany = city,
          stateCompany = state,
          zipcodeCompany = zipcode
        )
      return(data)
    }
    tibble()
  }

.generate_url <- function(ticker = "FB") {
  glue::glue("https://www.sec.gov/cgi-bin/browse-edgar?CIK={ticker}&owner=exclude&action=getcompany&Find=Search")
}

.parse_company_info <-
  function(url = "https://www.sec.gov/cgi-bin/browse-edgar?CIK=FB&owner=exclude&action=getcompany&Find=Search") {
    page <-
      url %>%
      read_html()

    name_parts <-
      page %>%
      .extract_info(css_node = '.companyName') %>%
      str_split('\\ CIK') %>%
      flatten_chr()

    if (length(name_parts) == 0) {
      stop("Invalid company symbol")
    }
    company_name <- name_parts[[1]]

    cik <-
      page %>% .extract_info(".companyName a") %>% str_split("\\(") %>%
      flatten_chr() %>%
      str_trim() %>%
      .[[1]]

    SIC <-
      page %>%
      .extract_info(".identInfo acronym+ a") %>%
      as.character() %>%
      readr::parse_number()

    street.address <-
      page %>%
      .extract_info(".mailer:nth-child(1) .mailerAddress:nth-child(1)")

    city.state.raw <-
      page %>%
      .extract_info(".mailer:nth-child(1) .mailerAddress+ .mailerAddress") %>%
      str_trim()
    city.state <- sub("\\s+$", "", city.state.raw)
    city.state <- gsub("\n", "", city.state)

    if (length(city.state) == 2) {
      street.address <- paste(street.address, city.state[1])
      city.state <- city.state[2]
    }
    df_city_state <-
      city.state %>% .parse_city_state() %>%
      mutate(addressStreetCompany = street.address) %>%
      dplyr::select(addressStreetCompany, everything())

    company.details <-
      page %>%
      .extract_info(".identInfo")
    fiscal.year.end <-
      gsub("^.*Fiscal Year End: ", "", company.details) %>%
      substr(1, 4)
    if (fiscal.year.end == "SIC:") {
      fiscal.year.end <- NA
    }
    state <- gsub("^.*State location: ", "", company.details) %>%
      substr(1, 2)
    state.inc <- gsub("^.*State of Inc.: ", "", company.details) %>%
      substr(1, 2)
    if (state.inc == "SI") {
      state.inc <- NA
    }
    data <-
      tibble(
        nameCompany = company_name,
        slugCIK = cik,
        idCIK = readr::parse_number(as.character(cik)),
        idSIC = SIC,
        stateIncorporated = state.inc,
        monthDayFiscalYearEnd = fiscal.year.end
      ) %>%
      bind_cols(df_city_state)
    data
  }


.parse_company_pages <-
  function(urls,
           return_message = TRUE) {
    df <-
      tibble()
    success <- function(res) {
      parse_company_info_safe <-
        purrr::possibly(.parse_company_info, tibble())

      data <-
        .parse_company_info(url = res$url)


      df <<-
        df %>%
        bind_rows(data)
    }
    failure <- function(msg) {
      cat(sprintf("Fail: %s (%s)\n", res$url, msg))
    }
    urls %>%
      walk(function(x) {
        curl_fetch_multi(url = x, success, failure)
      })
    multi_run()

    df
  }

.sec_ticker_info <-
  function(ticker = "VNO",
           return_message = TRUE) {
    if (return_message) {
      glue::glue("Acquiring company information for {ticker}") %>% cat(fill = T)
    }
    .parse_company_pages_safe <-
      purrr::possibly(.parse_company_pages, tibble())
    url <- ticker %>%
      .generate_url()

    data <-
      url %>%
      .parse_company_pages_safe() %>%
      mutate(idTicker = ticker) %>%
      dplyr::select(idTicker, everything()) %>%
      mutate_if(is.character,
                str_to_upper)

    data
  }

#' Get Ticker SEC company information
#'
#' @param tickers character vector of ticker symbols
#' @param return_message if \code{true} return a message
#'
#' @return a \code{tibble}
#' @export
#' @import curl glue dplyr purrr stringr rvest xml2
#'
#' @examples
#' sec_tickers_info(tickers = c("BXP", "AVB", "AAPL"))
sec_tickers_info <-
  function(tickers = c("VNO", "NVDA", "FB"),
           join_sic = T,
           snake_names = F,
           unformat = F,
           convert_case = T,
           amount_digits = 2,
           include_address = T,
           return_message = TRUE) {
    all_data <-
      tickers %>%
      future_map_dfr(function(x) {
        .sec_ticker_info(ticker = x, return_message = return_message)
      }) %>%
      dplyr::select(which(colMeans(is.na(.)) < 1))

    if (join_sic) {
      all_data <-
        all_data %>%
        left_join(dictionary_sic_codes(), by = "idSIC")
    }

    all_data %>%
      munge_tbl(
        snake_names = snake_names,
        unformat = unformat,
        convert_case = convert_case,
        amount_digits = amount_digits,
        include_address = include_address
      )

    all_data
  }

# page_guess --------------------------------------------------------------
.sec_filer_name_page_df <-
  function(){
    tibble(
      nameSEC = c("dateFiled", "filingHREF", "formName", "type", "XBRLREF"),
      nameActual = c(
        "dateFiling",
        "urlSECFilingDirectory",
        "nameForm",
        "idForm",
        "urlXBRL"
      )
    )
  }

.guess_page_ongoing <-
  function(url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=1184765&type=&dateb=&owner=include&start=0&count=100",
           override = FALSE) {
    page <-
      url %>%
      read_html()

    page_count <-
      url %>%
      str_split('count=') %>%
      flatten_chr() %>%
      .[[2]] %>%
      str_split('&') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.character() %>%
      readr::parse_number()

    items <-
      page %>%
      html_nodes('input') %>%
      html_attr('value') %>% str_to_upper() %>%
      unique()

    if (items %>% length() == 0){
      return(invisible())
    }

    no_page <-
      page %>%
      html_nodes('h1') %>%
      html_text() %>%
      str_to_lower() == 'invalid parameter'

    no_page <-
      no_page %>%
      length() > 0
    is_end <-
      !items %>% str_detect("NEXT 100") %>% sum(na.rm = T) > 0

    if (is_end & (!no_page)) {
      return(tibble(isEnd = TRUE, countStart = page_count))
    }
    if (!override) {
      if (!is_end) {
        return(tibble())
      }
    } else {
      return(tibble(countStart = page_count))
    }
    tibble(isEnd = is_end, countPage = page_count -100)
  }


# FIler Parsing -----------------------------------------------------------


.cik_filer_page_urls <-
  function(cik = 1184765, pages_out = 20) {
    start_pages <-
      seq(0, by = 100, length.out = pages_out)
    if ('dfEnd' %>% exists()) {
      eval(rm(dfEnd))
    }
    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=',cik, '&type=&dateb=&owner=include&start=',
           start_pages, '&count=100') %>%
      purrr::reduce(paste0)

    is_on <-
      TRUE
    for (url in urls) {
      if (!is_on) {
        invisible()
      }
      if('dfEnd' %>% exists()) {
        invisible()
      } else {
        df_end <-
          .guess_page_ongoing(url = url, override = FALSE)
        is_over_zero <-
          df_end %>% length() > 0
        if (is_over_zero) {
          assign('dfEnd', eval(df_end), envir = .GlobalEnv)
          assign('is_on', eval(FALSE), envir = .GlobalEnv)
          rm(is_over_zero)
        }
      }
    }
    still_none <-
      df_end %>% length() == 0
    if (still_none)  {
      df_end <-
        urls %>%
        future_map_dfr(function(x){
          .guess_page_ongoing(url = x, override = TRUE)
        })
      df_end <-
        df_end %>%
        slice(nrow(df_end))
    }
    length_actual_pages <-
      ceiling(df_end$countStart/100)
    length_actual <-
      seq(0, by = 100, length.out =  length_actual_pages)
    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=',cik, '&type=&dateb=&owner=include&start=',
           length_actual, '&count=100', '&output=xml') %>%
      purrr::reduce(paste0)

    df_filing_urls <-
      tibble(idCIK = cik, urlCIKPageFiling = urls) %>%
      mutate(countPage = 1:n())
    if('dfEnd' %>% exists()){
      rm(list = c('dfEnd'), pos = ".GlobalEnv")
    }
    return(df_filing_urls)
  }

.parse_cik_filer_page <-
  function(url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=899689&type=&dateb=&owner=include&start=600&count=100&output=xml",
           return_message = TRUE) {
    page <-
      url %>%
      read_xml()

    xml_nodes <-
      page %>%
      xml_contents() %>%
      .[[2]]

    filing_count <-
      xml_nodes %>%
      xml_contents() %>%
      xml_name()

    df_page_items <-
      seq_along(filing_count) %>%
      future_map_dfr(function(x) {
        xml_node <-
          xml_nodes %>%
          xml_contents() %>%
          .[[x]]

        items <-
          xml_node %>%
          xml_children() %>%
          xml_name()

        values <-
          xml_node %>%
          xml_children() %>%
          xml_text()

        return(tibble(
          countPageFiling = x,
          nameSEC = items,
          value = values
        ))
      }) %>%
      left_join(.sec_filer_name_page_df()) %>%
      suppressWarnings() %>%
      suppressMessages() %>%
      select(-nameSEC)

    df_page_items <-
      df_page_items %>%
      spread(nameActual, value) %>%
      .resolve_form_columns() %>%
      mutate(urlCIKPageFiling = url)

    df_page_items <-
      df_page_items %>%
      find_target_filings()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df_page_items)
  }
.df_general_name_df <-
  function() {
    tibble(nameSEC = c("CIK", "CIKHREF", "Location", "SIC", "SICDescription", "SICHREF",
                       "businessAddresscity", "businessAddressphoneNumber", "businessAddressstate",
                       "businessAddressstreet", "businessAddresszipCode", "fiscalYearEnd",
                       "mailingAddresscity", "mailingAddressstate", "mailingAddressstreet",
                       "mailingAddresszipCode", "name", "stateOfIncorporation", "businessAddressstreet2", "mailingAddressstreet2", 'formerNames',
                       'businessAddress', 'formerNamedate', 'formerNamename',
                       'mailingAddress'),
           nameActual = c("idCIK", "urlCIKFiling", "locationEntity", "idSICEntity", "nameIndustry", "urlSICMembers",
                          "cityAddressBusiness", "phoneAddressBusiness", "stateAddressBusiness",
                          "addressStreet1Business", "zipcodeBusiness", "periodFiscalYearEnd",
                          "cityAddressMailing", "stateAddressMailing", "addressStreet1Mailing",
                          "zipcodeMailing", "nameEntity", "stateIncorporation",
                          "addressStreet2Mailing", "addressStreet2Business", 'nameEntity',
                          'addressBusiness', 'dateFormerName', 'nameEntity',
                          'addressMailing')

    )
  }

.parse_cik_filer_general_info <-
  function(url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=1326801&type=&dateb=&owner=include&start=0&count=100&output=xml") {
    page <-
      url %>%
      read_xml()

    xml_nodes <-
      page %>%
      xml_contents() %>%
      .[[1]]

    items <-
      xml_nodes %>%
      xml_children() %>%
      xml_name()

    df_names <-
      .df_general_name_df()

    df_general <-
      seq_along(items) %>%
      future_map_dfr(function(x){

        xml_node <-
          xml_nodes %>%
          xml_contents() %>%
          .[[x]]

        item <-
          items[[x]]
        xml_search <-
          list('//', item, '/value') %>%
          purrr::reduce(paste0)

        value <-
          xml_node %>%
          xml_find_all(xml_search) %>%
          xml_text()
        no_val <-
          value %>% length() == 0

        if (item == 'formerNames') {
          value_search <-
            list('//', item) %>% purrr::reduce(paste0)

          item_parent <-
            xml_node %>%
            xml_find_all(value_search) %>%
            xml_children() %>%
            xml_name()

          items <-
            xml_node %>%
            xml_find_all(value_search) %>%
            xml_children() %>%
            xml_children() %>%
            xml_name()

          values <-
            xml_node %>%
            xml_find_all(value_search) %>%
            xml_children() %>%
            xml_children() %>%
            xml_text()

          df <-
            tibble(
              countItem = x,
              itemParent = item_parent[seq_along(item)],
              nameSEC = items,
              value = values
            ) %>%
            unite(nameSEC, itemParent, nameSEC, sep = '')
          return(df)
        }

        if (item == 'name') {
          value_search <-
            list('//', item) %>% purrr::reduce(paste0)
          items <-
            xml_node %>%
            xml_find_all(value_search) %>%
            xml_name()

          items <-
            items[length(items)]

          values <-
            xml_node %>%
            xml_find_all(value_search) %>%
            xml_text()

          values <-
            values[length(values)]

          df <-
            tibble(
              countItem = x,
              nameSEC = items,
              value = values
            )
          return(df)
        }

        if (no_val) {
          value_search <-
            list('//', item) %>% purrr::reduce(paste0)
          has_children <-
            xml_node %>%
            xml_find_all(value_search) %>% xml_children() %>% xml_length() %>% length() > 1
          if (has_children) {
            has_more_children <-
              xml_node %>%
              xml_find_all(value_search) %>%
              xml_children() %>%
              xml_children() %>% xml_length() %>% length() > 1

            if (has_more_children) {
              item_parent <-
                xml_node %>%
                xml_find_all(value_search) %>%
                xml_children() %>%
                xml_name()

              items <-
                xml_node %>%
                xml_find_all(value_search) %>%
                xml_children() %>%
                xml_children() %>%
                xml_name()

              values <-
                xml_node %>%
                xml_find_all(value_search) %>%
                xml_children() %>%
                xml_children() %>%
                xml_text()

              df <-
                tibble(
                  countItem = x,
                  itemParent = item_parent[seq_along(item)],
                  nameSEC = items,
                  value = values
                ) %>%
                unite(nameSEC, itemParent, nameSEC, sep = '')
              return(df)
            }

            item_parent <-
              xml_node %>%
              xml_find_all(value_search) %>%
              xml_name()

            item <-
              xml_node %>%
              xml_find_all(value_search) %>%
              xml_children() %>%
              xml_name()

            values <-
              xml_node %>%
              xml_find_all(value_search) %>%
              xml_children() %>%
              xml_text()

            df <-
              tibble(
                countItem = x,
                itemParent = item_parent,
                nameSEC = item,
                value = values
              ) %>%
              unite(nameSEC, itemParent, nameSEC, sep = '')
            return(df)
          }

          if (!has_children) {
            values <-
              xml_node %>%
              xml_find_all(value_search) %>%
              xml_text()
          }
          df <-
            tibble(
              countItem = x,
              nameSEC = item,
              value = values
            )
          return(df)
        }
        nameSEC <-
          xml_node %>%
          xml_find_all(list('//', item) %>% purrr::reduce(paste0)) %>%
          xml_name()
        tibble(idRow = countRow, nameSEC, value = value)
      })

    df_general <-
      df_general %>%
      left_join(df_names) %>%
      suppressMessages()

    missing_names <-
      df_general$nameSEC[!df_general$nameSEC %in% df_names$nameSEC] %>%
      length() >0

    if (missing_names) {
      missing_n <-
        df_general$nameSEC[!df_general$nameSEC %in% df_names$nameSEC]
      stop(list("Missing ", missing_n) %>%
             purrr::reduce(paste0))
    }

    df_general <-
      df_general %>%
      select(-c(nameSEC,countItem)) %>%
      group_by(nameActual) %>%
      mutate(idRow = 1:n()) %>%
      filter(idRow == min(idRow)) %>%
      ungroup() %>%
      suppressMessages()

    col_order <-
      df_general$nameActual

    df_general <-
      df_general %>%
      spread(nameActual, value) %>%
      select(one_of(col_order)) %>%
      dplyr::rename(nameEntityLegal = nameEntity) %>%
      mutate(nameEntity = nameEntityLegal %>% str_to_upper() %>% str_replace_all('\\.|\\,', '') %>% str_trim()) %>%
      separate(nameEntity,
               sep = '\\ /',
               into = c('nameEntity', 'idLocationEntity')) %>%
      mutate(
        idLocationEntity = idLocationEntity %>% str_replace_all('\\/', '') %>% str_trim()
      ) %>%
      select(nameEntity, everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    return(df_general)
  }


.cik_filer_filings <-
  function(cik = 899689) {
    df_urls <-
      .cik_filer_page_urls(cik = cik) %>%
      suppressWarnings() %>%
      suppressMessages()

    parse_cik_filer_page_safe <-
      purrr::possibly(.parse_cik_filer_page, tibble())

    df_filings <-
      df_urls$urlCIKPageFiling %>%
      future_map_dfr(function(x) {
        parse_cik_filer_page_safe(url = x)
      }) %>%
      mutate(idCIK = cik) %>%
      select(idCIK, everything())

    df_general <-
      df_urls$urlCIKPageFiling[[1]] %>%
      .parse_cik_filer_general_info() %>%
      mutate_all(funs(ifelse(. == '', NA, .))) %>%
      .resolve_form_columns() %>%
      select(which(colMeans(is.na(.)) < 1))

    df_filings <-
      df_filings %>%
      left_join(df_general %>% select(idCIK, nameEntity)) %>%
      suppressMessages()

    df_filings <-
      df_filings %>%
      select(-countPageFiling) %>%
      arrange(dateFiling) %>%
      mutate(countFilingEntity = 1:n()) %>%
      arrange(desc(dateFiling)) %>%
      select(countFilingEntity, idCIK, nameEntity, everything())

    if ('urlXBRL' %in% names(df_filings)) {
      df_filings <-
        df_filings %>%
        mutate(hasXBRL = ifelse(!urlXBRL %>% is.na(), TRUE, FALSE))
    }

    df_filings
  }

# SIC Search --------------------------------------------------------------


.sic_filer_page_urls <-
  function(sic = 6798, pages_out = 20) {
    start_pages <-
      seq(0, by = 100, length.out = pages_out)
    if ('dfEnd' %>% exists()) {
      eval(rm(dfEnd))
    }

    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&SIC=',sic, '&type=&dateb=&owner=include&start=',
           start_pages, '&count=100') %>%
      purrr::reduce(paste0)

    is_on <-
      TRUE
    for (url in urls) {
      if (!is_on) {
        invisible()
      }
      if('dfEnd' %>% exists()) {
        invisible()
      } else {
        df_end <-
          .guess_page_ongoing(url = url, override = FALSE)
        is_over_zero <-
          df_end %>% length() > 0
        if (is_over_zero) {
          assign('dfEnd', eval(df_end), envir = .GlobalEnv)
          assign('is_on', eval(FALSE), envir = .GlobalEnv)
          rm(is_over_zero)
        }
      }
    }
    still_none <-
      df_end %>% length() == 0
    if (still_none)  {
      df_end <-
        tibble(countStart = 0)
    }
    length_actual_pages <-
      ceiling(df_end$countStart/100)
    length_actual <-
      seq(0, by = 100, length.out =  length_actual_pages)
    urls <-
      list('https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&SIC=',sic, '&type=&dateb=&owner=include&start=',
           length_actual, '&count=100', '&output=xml') %>%
      purrr::reduce(paste0)

    df_sic_urls <-
      tibble(idSIC = sic, urlSICPageFiling = urls) %>%
      mutate(countPage = 1:n())
    if('dfEnd' %>% exists()){
      rm(list = c('dfEnd'), pos = ".GlobalEnv")
    }
    return(df_sic_urls)
  }

.sic_code_filer <-
  function(sic = 6798,
           return_message = TRUE) {

    sic_filer_page_urls_safe <-
      purrr::possibly(.sic_filer_page_urls, tibble())

    url_df <-
      sic_filer_page_urls_safe(sic = sic)

    parse_search_page_safe <-
      purrr::possibly(.parse_search_page, tibble())

    all_data <-
      url_df$urlSICPageFiling %>%
      future_map_dfr(function(x){
        parse_search_page_safe(url = x, return_message = return_message)
      }) %>%
      mutate(idSIC = sic) %>%
      select(idSIC, everything())



    if (return_message) {
      list('\nReturned ', all_data %>% nrow() %>% formattable::comma(digits = 0),
           ' SEC registered entities for SIC industry code ', sic,'\n') %>%
        purrr::reduce(paste0) %>%
        cat(fill = T)
    }
    return(all_data)
  }

#' SIC Cod Companies
#'
#' @param sic_codes
#' @param merge_names
#' @param return_message
#' @param nest_data
#'
#' @return
#' @export
#' @import dplyr tidyr purrr stringr formattable readr lubridate XBRL curl jsonlite lazyeval
#' @importFrom jsonlite fromJSON
#' @examples
#' edgar_sic_filers(sic_codes = c(3949, 3690, 3711))
edgar_sic_filers <-
  function(sic_codes = NULL,
           merge_names = TRUE,
           return_message = TRUE,
           nest_data = FALSE) {

    if (length(sic_codes) == 0) {
      stop("Please enter SIC codes to search")
    }

    sic_code_filer_safe <-
      purrr::possibly(.sic_code_filer, tibble())

    all_data <-
      sic_codes %>%
      future_map_dfr(function(x){
        sic_code_filer_safe(sic = x, return_message = return_message)
      })

    if (merge_names) {
      if (!'dataSICCodes' %>% exists()) {
        assign(x = 'dataSICCodes', value = eval(dictionary_sic_codes()),
               envir = .GlobalEnv)
      }

      all_data <-
        all_data %>%
        left_join(
          dataSICCodes %>% select(-nameOfficeAD)
        ) %>%
        select(idSIC, nameIndustry, everything()) %>%
        suppressMessages()
    }
    return(all_data)
  }

# SEC - Subsidiary --------------------------------------------------------

.parse_sec_url_for_cik <-
  function(url) {
    url %>%
      str_replace_all("https://www.sec.gov/Archives/edgar/data/", '') %>%
      str_split('\\/') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()
  }

.loc_df <-
  function() {
    tibble(
      nameLocation = c(
        "AFGHANISTAN",
        "ALAND ISLANDS",
        "ALBANIA",
        "ALGERIA",
        "AMERICAN SAMOA",
        "ANDORRA",
        "ANGOLA",
        "ANGUILLA",
        "ANTARCTICA",
        "ANTIGUA AND BARBUDA",
        "ARGENTINA",
        "ARMENIA",
        "ARUBA",
        "AUSTRALIA",
        "AUSTRIA",
        "AUSTRIA-HUNGARY",
        "AZERBAIJAN",
        "BADEN",
        "BAHAMAS",
        "BAHRAIN",
        "BANGLADESH",
        "BARBADOS",
        "BAVARIA",
        "BELARUS",
        "BELGIUM",
        "BELIZE",
        "BENIN",
        "BERMUDA",
        "BHUTAN",
        "BOLIVIA, PLURINATIONAL STATE OF",
        "BONAIRE, SINT EUSTATIUS AND SABA",
        "BOSNIA AND HERZEGOVINA",
        "BOTSWANA",
        "BOUVET ISLAND",
        "BRAZIL",
        "BRITISH INDIAN OCEAN TERRITORY",
        "BRUNEI DARUSSALAM",
        "BULGARIA",
        "BURKINA FASO",
        "BURUNDI",
        "CAMBODIA",
        "CAMEROON",
        "CANADA",
        "CABO VERDE",
        "CAYMAN ISLANDS",
        "CENTRAL AFRICAN REPUBLIC",
        "CHAD",
        "CHILE",
        "CHINA",
        "CHRISTMAS ISLAND",
        "COCOS (KEELING) ISLANDS",
        "COLOMBIA",
        "COMOROS",
        "CONGO, THE DEMOCRATIC REPUBLIC OF THE",
        "CONGO",
        "COOK ISLANDS",
        "COSTA RICA",
        "COTE D'IVOIRE",
        "CROATIA",
        "CUBA",
        "CURACAO",
        "CYPRUS",
        "CZECH REPUBLIC",
        "CZECHOSLOVAKIA",
        "DENMARK",
        "DJIBOUTI",
        "DOMINICA",
        "DOMINICAN REPUBLIC",
        "ECUADOR",
        "EGYPT",
        "EL SALVADOR",
        "EQUATORIAL GUINEA",
        "ERITREA",
        "ESTONIA",
        "ETHIOPIA",
        "FALKLAND ISLANDS (MALVINAS)",
        "FAROE ISLANDS",
        "FIJI",
        "FINLAND",
        "FRANCE",
        "FRENCH GUIANA",
        "FRENCH POLYNESIA",
        "FRENCH SOUTHERN TERRITORIES",
        "GABON",
        "GAMBIA",
        "GEORGIA",
        "GERMAN DEMOCRATIC REPUBLIC",
        "FEDERAL REPUBLIC OF GERMANY",
        "GERMANY",
        "GHANA",
        "GIBRALTAR",
        "GREECE",
        "GREENLAND",
        "GRENADA",
        "GUADELOUPE",
        "GUAM",
        "GUATEMALA",
        "GUERNSEY",
        "GUINEA",
        "GUINEA-BISSAU",
        "GUYANA",
        "HAITI",
        "HANOVER",
        "HEARD ISLAND AND MCDONALD ISLANDS",
        "HESSE ELECTORAL",
        "HESSE GRAND DUCAL",
        "HOLY SEE (VATICAN CITY STATE)",
        "HONDURAS",
        "HONG KONG",
        "HUNGARY",
        "ICELAND",
        "INDIA",
        "INDONESIA",
        "IRAN, ISLAMIC REPUBLIC OF",
        "IRAQ",
        "IRELAND",
        "ISLE OF MAN",
        "ISRAEL",
        "ITALY",
        "JAMAICA",
        "JAPAN",
        "JERSEY",
        "JORDAN",
        "KAZAKHSTAN",
        "KENYA",
        "KIRIBATI",
        "KOREA",
        "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",
        "KOREA, REPUBLIC OF",
        "KOSOVO",
        "KUWAIT",
        "KYRGYZSTAN",
        "LAO PEOPLE'S DEMOCRATIC REPUBLIC",
        "LATVIA",
        "LEBANON",
        "LESOTHO",
        "LIBERIA",
        "LIBYA",
        "LIECHTENSTEIN",
        "LITHUANIA",
        "LUXEMBOURG",
        "MACAO",
        "MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF",
        "MADAGASCAR",
        "MALAWI",
        "MALAYSIA",
        "MALDIVES",
        "MALI",
        "MALTA",
        "MARSHALL ISLANDS",
        "MARTINIQUE",
        "MAURITANIA",
        "MAURITIUS",
        "MAYOTTE",
        "MECKLENBURG SCHWERIN",
        "MEXICO",
        "MICRONESIA, FEDERATED STATES OF",
        "MODENA",
        "MOLDOVA, REPUBLIC OF",
        "MONACO",
        "MONGOLIA",
        "MONTENEGRO",
        "MONTSERRAT",
        "MOROCCO",
        "MOZAMBIQUE",
        "MYANMAR",
        "NAMIBIA",
        "NAURU",
        "NEPAL",
        "NETHERLANDS",
        "NETHERLANDS ANTILLES",
        "NEW CALEDONIA",
        "NEW ZEALAND",
        "NICARAGUA",
        "NIGER",
        "NIGERIA",
        "NIUE",
        "NORFOLK ISLAND",
        "NORTHERN MARIANA ISLANDS",
        "NORWAY",
        "OMAN",
        "PAKISTAN",
        "PALAU",
        "PALESTINE, STATE OF",
        "PANAMA",
        "PAPUA NEW GUINEA",
        "PARAGUAY",
        "PARMA",
        "PERU",
        "PHILIPPINES",
        "PITCAIRN",
        "POLAND",
        "PORTUGAL",
        "PUERTO RICO",
        "QATAR",
        "REPUBLIC OF VIETNAM",
        "REUNION",
        "ROMANIA",
        "RUSSIAN FEDERATION",
        "RWANDA",
        "SAINT BARTHELEMY",
        "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA",
        "SAINT KITTS AND NEVIS",
        "SAINT LUCIA",
        "SAINT MARTIN (FRENCH PART)",
        "SAINT PIERRE AND MIQUELON",
        "SAINT VINCENT AND THE GRENADINES",
        "SAMOA",
        "SAN MARINO",
        "SAO TOME AND PRINCIPE",
        "SAUDI ARABIA",
        "SAXONY",
        "SENEGAL",
        "SERBIA",
        "SEYCHELLES",
        "SIERRA LEONE",
        "SINGAPORE",
        "SINT MAARTEN (DUTCH PART)",
        "SLOVAKIA",
        "SLOVENIA",
        "SOLOMON ISLANDS",
        "SOMALIA",
        "SOUTH AFRICA",
        "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS",
        "SOUTH SUDAN",
        "SPAIN",
        "SRI LANKA",
        "SUDAN",
        "SURINAME",
        "SVALBARD AND JAN MAYEN",
        "SWAZILAND",
        "SWEDEN",
        "SWITZERLAND",
        "SYRIAN ARAB REPUBLIC",
        "TAIWAN, PROVINCE OF CHINA",
        "TAJIKISTAN",
        "TANZANIA, UNITED REPUBLIC OF",
        "THAILAND",
        "TIMOR-LESTE",
        "TOGO",
        "TOKELAU",
        "TONGA",
        "TRINIDAD AND TOBAGO",
        "TUNISIA",
        "TURKEY",
        "TURKMENISTAN",
        "TURKS AND CAICOS ISLANDS",
        "TUSCANY",
        "TUVALU",
        "TWO SICILIES",
        "UGANDA",
        "UKRAINE",
        "UNITED ARAB EMIRATES",
        "UNITED KINGDOM",
        "UNITED STATES",
        "UNITED STATES MINOR OUTLYING ISLANDS",
        "URUGUAY",
        "UZBEKISTAN",
        "VANUATU",
        "VENEZUELA, BOLIVARIAN REPUBLIC OF",
        "VIET NAM",
        "VIRGIN ISLANDS, BRITISH",
        "VIRGIN ISLANDS, U.S.",
        "WALLIS AND FUTUNA",
        "WESTERN SAHARA",
        "WUERTTEMBURG",
        "YEMEN",
        "YEMEN ARAB REPUBLIC",
        "YEMEN PEOPLE'S REPUBLIC",
        "YUGOSLAVIA",
        "ZAMBIA",
        "ZANZIBAR",
        "ZIMBABWE",
        "ALABAMA",
        "ALASKA",
        "ARIZONA",
        "ARKANSAS",
        "CALIFORNIA",
        "COLORADO",
        "CONNECTICUT",
        "DELAWARE",
        "FLORIDA",
        "GEORGIA",
        "HAWAII",
        "IDAHO",
        "ILLINOIS",
        "INDIANA",
        "IOWA",
        "KANSAS",
        "KENTUCKY",
        "LOUISIANA",
        "MAINE",
        "MARYLAND",
        "MASSACHUSETTS",
        "MICHIGAN",
        "MINNESOTA",
        "MISSISSIPPI",
        "MISSOURI",
        "MONTANA",
        "NEBRASKA",
        "NEVADA",
        "NEW HAMPSHIRE",
        "NEW JERSEY",
        "NEW MEXICO",
        "NEW YORK",
        "NORTH CAROLINA",
        "NORTH DAKOTA",
        "OHIO",
        "OKLAHOMA",
        "OREGON",
        "PENNSYLVANIA",
        "RHODE ISLAND",
        "SOUTH CAROLINA",
        "SOUTH DAKOTA",
        "TENNESSEE",
        "TEXAS",
        "UTAH",
        "VERMONT",
        "VIRGINIA",
        "WASHINGTON",
        "WEST VIRGINIA",
        "WISCONSIN",
        "WYOMING",
        "DISTRICT OF COLUMBIA",
        "ENGLAND",
        "BRITISH VIRGIN ISLANDS",
        "NETHERLAND ANTILLES",
        "RUSSIA",
        "SOUTH KOREA",
        'TAIWAN',
        "VENEZUELA",
        'CHANNEL ISLANDS'
      )
    )
  }

.parse_page_sub_multi_item_html <-
  function(page) {
    locations <-
      .loc_df() %>%
      .$nameLocation
    subsidiaries <-
      page %>%
      html_nodes('td div') %>%
      html_text() %>%
      str_replace_all('\u0095 |\u0096|\u0095\n', '') %>%
      str_trim()

    subsidiaries <-
      subsidiaries[!subsidiaries == '']

    data_nodes <-
      page %>%
      html_nodes('td') %>%
      html_text() %>%
      str_replace_all('\u0095 |\u0096|\u0095\n', '') %>%
      str_trim() %>%
      str_to_upper()

    data_nodes <-
      data_nodes[!data_nodes == '']

    location_items <-
      data_nodes[data_nodes %in% locations]

    pct_vals <-
      tibble(value = data_nodes) %>%
      filter(!value %>% str_detect("\\([(1-9)]\\)")) %>%
      mutate(pctSubsidiaryOwned = value %>% as.numeric()) %>%
      filter(!pctSubsidiaryOwned %>% is.na()) %>%
      slice(seq_along(subsidiaries)) %>%
      .$pctSubsidiaryOwned / 100 %>%
      suppressWarnings() %>%
      suppressMessages()

    all_data <-
      tibble(
        nameSubsidiary = subsidiaries,
        nameLocationSubsidiary = location_items,
        pctSubsidiaryOwned = pct_vals
      ) %>%
      mutate(nameSubsidiary = nameSubsidiary %>% str_to_upper())

    return(all_data)
  }

.parse_page_subsidiary_table_html <-
  function(page,
           numbers = 1:10,
           hit_terms = c(
             "Organized",
             "STATE OR|STATE OF|JURISDICTION OF|JURISDICTION OF INCORPORATION OR ORGANIZATION|JURISDICTION|JURISDICTION OF INCORPORATION OR\nORGANIZATION",
             "NAME|ORGANIZED UNDER THE LAWS OF",
             'STATE OF ORGANIZATION',
             'STATE OR COUNTRY OF ORGANIZATION',
             'NAME OF SUBSIDIARY',
             'NAME',
             'ENTITY NAME',
             'the laws of',
             'Percentage of voting',
             'securities owned by',
             'immediate parent',
             'CERTAIN INTERMEDIARY SUBSIDIARIES',
             'Note:',
             'Organized',
             'Under the',
             'Laws of',
             'OWNED BY',
             'IMMEDIATE',
             'PARENT',
             "OWNS",
             "CERTAIN INTERMEDIARY SUBSIDIARIES",
             'PERCENTAGE',
             'OF VOTING',
             'SECURITIES'
           )) {
    is_ib1 <-
      page %>%
      html_nodes('b font') %>%
      html_text() %>% length() > 0

    if (is_ib1) {
      items_bold <-
        page %>%
        html_nodes('b font') %>%
        html_text() %>%
        str_to_upper() %>%
        str_replace_all('\n', ' ') %>%
        stringi::stri_trans_general("Latin-ASCII")
      str_split('\\-') %>%
        flatten_chr() %>%
        str_trim()
    } else {
      items_bold <-
        page %>%
        html_nodes('b') %>%
        html_text() %>%
        str_to_upper() %>%
        str_replace_all('\n', ' ') %>%
        stringi::stri_trans_general("Latin-ASCII") %>%
        str_split('\\-') %>%
        flatten_chr() %>%
        str_trim() %>%
        unique()
    }

    has_date <-
      items_bold %>% grep(month.name %>% str_to_upper() %>% paste(collapse = '|'), .) %>% length > 0

    if (has_date) {
      date_data <-
        items_bold[items_bold %>% grep(month.name %>% str_to_upper() %>% paste(collapse = '|'), .)] %>%
        lubridate::mdy()
    } else {
      date_data <-
        NA
    }

    hit_terms <-
      hit_terms %>%
      append(items_bold) %>%
      str_to_upper() %>%
      unique() %>%
      append(list('(', letters, ')') %>%
               purrr::invoke(paste0, .)) %>%
      paste0(collapse = '|')


    hit_terms_in <-
      hit_terms %>% str_split('\\|') %>%
      flatten_chr()

    locations <-
      .loc_df() %>%
      .$nameLocation

    all_data <-
      numbers %>%
      future_map_dfr(function(x) {
        css_selector <-
          paste0('td:nth-child(', x, ')')
        has_length <-
          page %>%
          html_nodes(css_selector) %>% length() > 0
        if (has_length) {
          item <-
            paste0("X" , x)

          value <-
            page %>%
            html_nodes(css_selector) %>%
            html_text() %>%
            str_trim()
          tibble(item, value)
        }
      }) %>%
      mutate(
        value = value %>% str_to_upper() %>% str_replace_all('\n  ', ' ') %>% str_replace_all('\u0096 ', '')
      ) %>%
      filter(!value == '')

    has_loc_key <-
      all_data %>%
      filter(value %in% locations) %>%
      nrow() > 0

    if (has_loc_key) {
      loc_cols <-
        all_data %>%
        filter(value %in% locations) %>%
        .$item %>%
        unique()
      if (loc_cols %>% length == 1) {
        loc_col <-
          loc_cols[[1]]
      }
    }

    has_pct <-
      all_data %>%
      filter(value %>% str_detect("PERCENT")) %>%
      .$item %>% unique() %>% length() > 0

    if (has_pct) {
      pct_col <-
        all_data %>%
        filter(value %>% str_detect("PERCENT")) %>%
        .$item %>% unique()
    } else {
      pct_col <-
        NA
    }

    is_whack <-
      pct_col[[1]] %in% loc_cols

    if (is_whack) {
      all_data <-
        page %>%
        .parse_page_sub_multi_item_html() %>%
        mutate(dateSubsidiaryAsOf = date_data)

      return(all_data)
    }

    all_data <-
      all_data %>%
      filter(!value %in% items_bold) %>%
      filter(!value %>% str_detect(paste0(items_bold %>% unique(), collapse = '|'))) %>%
      filter(!value %in% hit_terms_in) %>%
      filter(!value %>% str_detect(hit_terms))

    count_df <-
      all_data %>% count(item, sort = T) %>%
      arrange(item) %>%
      spread(item, n)

    off_one <-
      (count_df[, 2] %>% extract2(1)) - (count_df[, 1] %>% extract2(1)) == 1

    min_item <-
      count_df %>% gather(item, value) %>% filter(value == min(value)) %>% .$item

    change_pct <-
      has_pct & (pct_col == min_item) %>% sum() > 0

    if (change_pct) {
      pct_col <-
        names(count_df)[[3]]
    }

    if (off_one) {
      df <-
        all_data$item %>% unique() %>%
        future_map_dfr(function(x) {
          has_data <-
            all_data %>%
            filter(item == x) %>%
            filter(!value %>% is.na()) %>%
            filter(!value == '') %>%
            nrow()

          if (has_data) {
            all_data %>%
              filter(item == x) %>%
              filter(!value %>% is.na()) %>%
              filter(!value == '') %>%
              filter(!value %>% str_detect(hit_terms)) %>%
              mutate(idSubsidiary = 1:n())
          }
        }) %>%
        filter(!value %>% str_detect(hit_terms)) %>%
        spread(item, value)

      if (change_pct) {
        df <-
          df %>%
          select(-one_of(min_item))
      }
    }

    if (!off_one) {
      has_property <-
        items_bold %>% str_detect('PROPERTY') %>% sum() > 0
      if (has_property) {
        tables <-
          page %>%
          html_table(fill = T)
        df <-
          seq_along(tables) %>%
          future_map_dfr(function(x) {
            table_df <-
              tables[[x]] %>%
              data.frame(stringsAsFactors = FALSE) %>%
              as_tibble()

            column_df <-
              table_df %>% slice(1) %>%
              gather(column, value) %>%
              mutate(idColumn = 1:n()) %>%
              filter(!value %>% is.na()) %>%
              left_join(tibble(
                value = c(
                  "PROPERTY",
                  "ENTITIES",
                  "STATE OF FORMATION",
                  "DATE OF FORMATION",
                  " ",
                  'General Information:'
                ),
                nameItem = c(
                  'nameProperty',
                  'nameSubsidiary',
                  'locationOrganizationSubsidiary',
                  'dateSubsidiaryFormed',
                  'locationOrganizationSubsidiary',
                  'nameSubsidiary'
                )
              )) %>%
              suppressMessages()
            two_col <-
              column_df %>% nrow() == 2
            if (two_col) {
              column_df$nameItem[[2]] <-
                'locationOrganizationSubsidiary'
            }

            columns_keep <-
              column_df$idColumn

            table_df <-
              table_df <-
              table_df %>%
              select(columns_keep) %>%
              slice(-1) %>%
              purrr::set_names(column_df$nameItem)

            table_df <-
              table_df %>%
              mutate_all(funs(. %>% str_trim() %>% str_to_upper())) %>%
              mutate(nameSubsidiary = ifelse(nameSubsidiary == '', NA, nameSubsidiary)) %>%
              filter(!nameSubsidiary %>% is.na())


            if (two_col) {
              table_df <-
                table_df %>%
                tidyr::separate(
                  locationOrganizationSubsidiary,
                  into = c(
                    'locationOrganizationSubsidiary',
                    'dateSubsidiaryFormed'
                  ),
                  sep = 'FORMED'
                ) %>%
                suppressWarnings() %>%
                mutate(locationOrganizationSubsidiary = locationOrganizationSubsidiary %>% str_replace_all('\\,', '')) %>%
                mutate_all(funs(. %>% str_replace('\n', '') %>% str_trim()))
            }


            if ('nameProperty' %in% names(table_df)) {
              table_df <-
                table_df %>%
                mutate(nameProperty = ifelse(nameProperty == '', NA, nameProperty)) %>%
                mutate_all(funs(. %>% str_replace('\n|\n  |\n  ', '') %>% str_trim())) %>%
                mutate_all(funs(. %>% str_replace('\n', '') %>% str_trim())) %>%
                mutate_all(funs(. %>% str_replace('  ', ' ') %>% str_trim())) %>%
                fill(nameProperty)

            }

            return(table_df)
          })

        if ('dateSubsidiaryFormed' %in% names(df)) {
          df <-
            df %>%
            mutate(dateSubsidiaryFormed = dateSubsidiaryFormed %>% lubridate::mdy())
        }

        df <-
          df %>%
          mutate(idCIK = cik, urlSEC = url) %>%
          select(idCIK, nameSubsidiary, everything()) %>%
          mutate(
            locationOrganizationSubsidiary = locationOrganizationSubsidiary %>% str_replace_all(
              'A |LIMITED LIABILITY COMPANY|CORPORATION|LIMITED PARTNERSHIP'
            ) %>% str_trim()
          )

        return(df)
      }
      if (!has_property) {
        df <-
          all_data %>%
          mutate(value = ifelse(value == '', NA, value)) %>%
          filter(!value %>% is.na()) %>%
          group_by(item) %>%
          mutate(idSubsidiary = 1:n()) %>%
          spread(item, value) %>%
          filter(!X1 == '') %>%
          mutate(idSubsidiary = 1:n()) %>%
          gather(item, value, -c(X1, idSubsidiary)) %>%
          ungroup() %>%
          filter(!value %>% str_detect(hit_terms)) %>%
          spread(item, value)
      }

    }

    df <-
      df %>%
      dplyr::rename(nameSubsidiary = X1) %>%
      tidyr::separate(nameSubsidiary,
                      sep = '\\(',
                      into = c('nameSubsidiary', 'remove')) %>%
      select(-dplyr::matches("remove")) %>%
      mutate(nameSubsidiary = nameSubsidiary %>% str_trim()) %>%
      suppressWarnings() %>%
      select(-dplyr::matches("idSubsidiary"))

    if (has_pct) {
      names(df)[names(df) %>% grep(pct_col, .)] <-
        'pctSubsidiaryOwned'

      df <-
        df %>%
        mutate_at(df %>% select(dplyr::matches('pct')) %>% names(),
                  funs(. %>% as.numeric() / 100)) %>%
        suppressWarnings()
    }

    if (has_loc_key) {
      names(df)[names(df) %>% grep(loc_col, .)] <-
        'locationOrganizationSubsidiary'
    }

    df <-
      df %>%
      select(-dplyr::matches("X"))

    return(df)
  }

.parse_sec_subsidiary_url_html <-
  function(url = "https://www.sec.gov/Archives/edgar/data/34088/000003408816000065/xomexhibit21.htm",
           return_message = TRUE) {
    cik <-
      url %>%
      .parse_sec_url_for_cik()

    page <-
      url %>%
      read_html()

    is_zero <-
      page %>%
      html_nodes(paste0('td:nth-child(', 1, ')')) %>%
      length() == 0
    locations <-
      .loc_df() %>%
      .$nameLocation

    if (is_zero) {
      data <-
        page %>%
        html_nodes('font') %>%
        html_text() %>%
        str_replace_all('\\ ', ' ')

      data <-
        data[!data == '']


      is_parenth <-
        data %>% str_detect('\\(') %>% sum() / length(data) > .25

      if (is_parenth) {
        data <-
          data[data %>% str_detect('\\(')]

        df <-
          tibble(data) %>%
          separate(
            data,
            sep = '\\(',
            into = c('nameSubsidiary', 'locationOrganizationSubsidiary')
          ) %>%
          separate(
            locationOrganizationSubsidiary,
            sep = '\\)',
            into = c('locationOrganizationSubsidiary', 'remove')
          ) %>%
          select(-remove) %>%
          mutate_all(funs(. %>% str_trim() %>% str_to_upper())) %>%
          mutate(idCIK = cik, urlSEC = url) %>%
          select(-dplyr::matches("idSubsidiary"))

        if (return_message) {
          list("Parsed: ", url) %>%
            purrr::invoke(paste0, .) %>% cat(fill = T)
        }

        return(df)
      }

      is_nested <-
        page %>%
        html_nodes('b font') %>%
        html_text() %>% length() > 2

      if (is_nested) {
        locations_raw <-
          page %>%
          html_nodes('b font') %>%
          html_text() %>%
          str_replace_all('\\:', '') %>%
          str_to_upper()

        locations <-
          locations_raw[!locations_raw %>% str_detect('EXHIBIT|SUBSIDIARY|SUBSIDIARIES')]

        data <-
          data[data %>% nchar() > 3] %>% str_to_upper()

        df <-
          tibble(nameSubsidiary = data) %>%
          mutate(idRow = 1:n())

        .loc_df <-
          tibble(nameSubsidiary = locations) %>%
          inner_join(df %>% select(idRow, nameSubsidiary)) %>%
          mutate(idRow = idRow + 1) %>%
          select(locationOrganizationSubsidiary = nameSubsidiary, idRow) %>%
          suppressMessages()

        df <-
          df %>%
          filter(!nameSubsidiary %>% str_detect('SUBSIDIARY|SUBSIDIARIES')) %>%
          filter(!nameSubsidiary %>% str_detect(paste0(locations_raw, collapse = '|'))) %>%
          suppressWarnings()

        df <-
          df %>%
          left_join(.loc_df) %>%
          fill(locationOrganizationSubsidiary) %>%
          mutate(urlSEC = url, idCIK = cik) %>%
          select(idCIK,
                 nameSubsidiary,
                 locationOrganizationSubsidiary,
                 everything()) %>%
          select(-idRow) %>%
          suppressMessages() %>%
          select(-dplyr::matches("idSubsidiary"))
        if (return_message) {
          list("Parsed: ", url) %>%
            purrr::invoke(paste0, .) %>% cat(fill = T)
        }

        return(df)
      }
    }

    is_font_table <-
      page %>%
      html_nodes('b') %>%
      html_text() %>% length() == 0

    if (is_font_table) {
      all_data <-
        1:10 %>%
        future_map_dfr(function(x) {
          css_selector <-
            paste0('td:nth-child(', x, ')')
          has_length <-
            page %>%
            html_nodes(css_selector) %>% length() > 0
          if (has_length) {
            item <-
              paste0("X" , x)

            value <-
              page %>%
              html_nodes(css_selector) %>%
              html_text() %>%
              str_trim()
            tibble(item, value)
          }
        }) %>%
        mutate(
          value = value %>% str_to_upper() %>% str_replace_all('\n  ', ' ') %>% str_replace_all('\u0096 ', '')
        ) %>%
        filter(!value == '')


      has_loc_key <-
        all_data %>%
        filter(value %in% locations) %>%
        nrow() > 0

      if (has_loc_key) {
        loc_col <-
          all_data %>%
          filter(value %in% locations) %>%
          .$item %>%
          unique()
      }

      hit_terms_in <-
        c(
          "Organized",
          "STATE OR|STATE OF|JURISDICTION OF|JURISDICTION OF INCORPORATION OR ORGANIZATION|JURISDICTION|JURISDICTION OF INCORPORATION OR\nORGANIZATION",
          "NAME|ORGANIZED UNDER THE LAWS OF",
          'STATE OF ORGANIZATION',
          'STATE OR COUNTRY OF ORGANIZATION',
          'NAME OF SUBSIDIARY',
          'NAME',
          'ENTITY NAME',
          'the laws of',
          'Percentage of voting',
          'securities owned by',
          'immediate parent',
          'CERTAIN INTERMEDIARY SUBSIDIARIES',
          'PERCENT OWNED'
        )
      hit_terms <-
        hit_terms %>%
        str_to_upper() %>%
        paste0(collapse = '|')

      hit_terms_in <-
        hit_terms %>% str_split('\\|') %>%
        flatten_chr()

      has_pct_col <-
        all_data %>%
        filter(value %in% "100") %>%
        nrow() > 0 |
        (all_data %>% filter(value %>% str_detect('PERCENT')) %>% nrow() > 0)

      if (has_pct_col) {
        pct_col <-
          all_data %>%
          filter((value %in% "100") |
                   (value %>% str_detect("PERCENT"))) %>%
          .$item %>%
          unique() %>%
          .[[1]]
      }

      all_data <-
        all_data %>%
        filter(!value %in% hit_terms_in) %>%
        filter(!value %>% str_detect(hit_terms)) %>%
        filter(!value == '') %>%
        mutate(valueNC = value %>% nchar()) %>%
        filter(!value %>% str_detect("PERCENT"))

      if (!has_pct_col) {
        all_data <-
          all_data %>%
          filter(valueNC > 3)
      }
      all_data <-
        all_data %>%
        select(-valueNC) %>%
        group_by(item) %>%
        mutate(idSubsidiary = 1:n()) %>%
        spread(item, value) %>%
        ungroup() %>%
        dplyr::rename(nameSubsidiary = X1)

      if (has_loc_key) {
        names(all_data)[names(all_data) %in% loc_col] <-
          'locationOrganizationSubsidiary'
      }

      if (has_pct_col) {
        names(all_data)[names(all_data) %in% pct_col] <-
          'pctSubsidiaryOwned'

        all_data <-
          all_data %>%
          mutate(pctSubsidiaryOwned = pctSubsidiaryOwned %>% as.numeric() / 100)
      }

      all_data <-
        all_data %>%
        mutate(idCIK = cik,
               dateSubsidiaryAsOf = NA,
               urlSEC = url) %>%
        select(-dplyr::matches("idSubsidiary|^X"))

      if (return_message) {
        list("Parsed: ", url) %>%
          purrr::invoke(paste0, .) %>% cat(fill = T)
      }

      return(all_data)

    }

    df <-
      page %>%
      .parse_page_subsidiary_table_html() %>%
      suppressWarnings()

    df <-
      df %>%
      filter(!nameSubsidiary == '') %>%
      mutate(idCIK = cik, urlSEC = url) %>%
      select(-dplyr::matches("idSubsidiary")) %>%
      select(idCIK, everything())

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df %>% select(-dplyr::matches("idSubsidiary")))

  }

# url = 'https://www.sec.gov/Archives/edgar/data/19617/000095012301002499/y46253ex21-1.txt'
.parse_sec_subsidiary_url_text <-
  function(url = "https://www.sec.gov/Archives/edgar/data/899689/000104746903007996/a2104897zex-21.txt",
           return_message = TRUE) {
    cik <-
      url %>%
      .parse_sec_url_for_cik()
    data <-
      url %>%
      read_lines()

    data <-
      data[!data == '']
    has_s <-
      data %>% str_detect("<S>") %>% sum() > 0

    if (has_s) {
      data <-
        data[(data %>% grep("<S>", .) %>% .[[1]] + 1):length(data)]
    }

    data <-
      data[!data %>% str_detect("STATE OF|NAME OF|---|NAME OF SUBSIDIARY|ORGANIZED UNDER|THE LAWS OF|<")]

    data <-
      data[data %>% nchar() > 3]

    df <-
      seq_along(data) %>%
      future_map_dfr(function(x) {
        item <-
          data[[x]]

        items <-
          item %>%
          str_replace_all('\\   ', '\\:') %>%
          str_split('\\:') %>%
          flatten_chr() %>%
          str_trim() %>%
          str_to_upper()

        items <-
          items[!items == '']

        if (items %>% length() == 1) {
          return(tibble())
        }

        two_items <-
          items %>% length() == 2
        if (two_items) {
          table_data <-
            tibble(
              idSubsidiary = x,
              nameSubsidiary = items[[1]],
              locationOrganizationSubsidiary = items[[2]]
            )
        }
        three_items <-
          items %>% length() == 3
        if (three_items) {
          table_data <-
            tibble(
              idSubsidiary = x,
              nameSubsidiary = items[[1]],
              locationOrganizationSubsidiary = items[[2]],
              pctSubsidiaryOwned = items[[3]] %>% as.numeric() / 100
            )
        }

        table_data <-
          table_data %>%
          mutate(
            isChildSubsidiary = ifelse(nameSubsidiary %>% substr(1, 1) == "-", TRUE, FALSE),
            nameSubsidiary = nameSubsidiary %>% str_replace('\\-', '') %>% str_trim()
          )
        return(table_data)
      }) %>%
      mutate(idCIK = cik, urlSEC = url) %>%
      select(-dplyr::matches("idSubsidiary")) %>%
      select(idCIK,
             nameSubsidiary,
             locationOrganizationSubsidiary,
             everything()) %>%
      filter(!nameSubsidiary %in% c('NAME', 'ORGANIZED UNDER'))

    df <-
      df %>%
      filter(!nameSubsidiary == '')

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>% cat(fill = T)
    }

    return(df)

  }

.parse_sec_subsidiary_url  <-
  function(url = "https://www.sec.gov/Archives/edgar/data/34088/000003408816000065/xomexhibit21.htm",
           return_message = TRUE)  {
    is_text <-
      url %>%
      str_detect("txt")

    is_html <-
      url %>%
      str_detect("html|htm")
    parse_sec_subsidiary_url_text_safe <-
      purrr::possibly(.parse_sec_subsidiary_url_text, tibble())

    parse_sec_subsidiary_url_html_safe <-
      purrr::possibly(.parse_sec_subsidiary_url_html, tibble())

    if (is_text) {
      data <-
        url %>%
        parse_sec_subsidiary_url_text_safe()
    }

    if (is_html) {
      data <-
        url %>%
        parse_sec_subsidiary_url_html_safe()
    }
    data
  }
abresler/fundManageR documentation built on April 1, 2024, 5:46 p.m.