R/read_funds_txt.R

Defines functions read_funds_txt

Documented in read_funds_txt

#' Read txt file with library funds information in txt format, mined from opac.
#' Make out of it dataframe with following columns:
#' id: from 001 field
#' field: field name 000-999
#' field_number: number of repeating field in corresponding record
#' car: contents of the field with corresponding name from corresponding record
#' @param file A string.
#' @param encoding A string.
#' @example
#' read_funds_txt(file = "books.txt", encoding = "UTF-8")

read_funds_txt <- function(file, encoding = "UTF-8") {
  raw <- readtext(file,
                  encoding = encoding)$text
  fund <- str_split(raw, "\n\n")
  rm(raw)
  gc()
  fund <- fund[[1]]
  fund <- lapply(fund, function(x) str_split(x, "\n") %>% unlist())
  n_fields <- lapply(fund, length) %>% unlist()
  id_vector <- rep(
    lapply(fund, function(x) x[3]) %>% unlist(),
    n_fields
  )
  id_vector <- str_sub(id_vector, 5, -1)
  fund <- fund %>% unlist()
  fund_df <- data.frame(
    id = id_vector,
    content = fund
  )
  fund_df <- fund_df %>% mutate(field = str_sub(content, 1, 3),
                                var = str_sub(content, 5, -1)) %>% select(-content) %>%
    filter(field != "// ")
  fund_df <- fund_df %>% group_by(id, field) %>% mutate(field_number = 1:n())
  fund_df <- fund_df %>% select(id, field, field_number, var)
  return(fund_df)
}
Volodin-DD/rusmarcFunds documentation built on Oct. 31, 2019, 1:11 a.m.