data-raw/create_bioentity_df.R

library(datapasta)
library(tibble)
library(dplyr)

scraped_from_swagger <- datapasta::vector_construct_vertical()

# Original nº of rows: 83
# Final nº of rows: 79 
df_bioentity <- enframe(scraped_from_swagger, name = NULL) %>% 
  separate_rows(value, sep = '\n') %>%
  filter(str_detect(value, 'bioentity')) %>%
  separate(value, sep = '/', into = letters[1:7]) %>%
  select(-a, -b) %>%
  rename(refid = c, id = d, queryid = e, ext = f, idk2 = g) %>%
  replace_na(list(id = 'not_required', 
                  queryid = 'not_required', 
                  ext = 'not_required')) %>%
  filter(!str_detect(ext, 'taxid', )) %>%
  filter(!str_detect(ext, 'taxon')) %>%
  select(-idk2) %>%
  filter(!str_detect(refid, 'goterm')) %>% 
  mutate(refid = str_remove(refid, '\",')) %>%  
  mutate(queryid = str_remove(queryid, '\",')) %>%  
  mutate(id = str_remove(id, '\",')) %>%  
  mutate(ext = str_remove(ext, '\",')) %>%  
  mutate(id = str_remove(id, '\\{' ), 
         id = str_remove(id, '\\}')) %>%
  mutate(refid = str_remove(refid, '\\{' ), 
         refid = str_remove(refid, '\\}'))

vector_refid <- df_bioentity %>% select(refid) %>% distinct() %>% pull()
vector_queryid <- df_bioentity %>% select(queryid) %>% distinct() %>% pull()

save(df_bioentity, vector_refid, vector_queryid, file = "R/sysdata.rda")
frequena/rbiolink documentation built on May 16, 2020, 10:20 p.m.