tests/testthat/test-parsers.R

fake_now <- structure(1642010354.70704, tzone = "", class = c("POSIXct", "POSIXt"))

target_folder <- file.path("Records", "SessionParse", "Query1")

test_that("EMBASE files are parsed correctly", {
  # expected <- "'data.frame':\t19 obs. of  15 variables:\n $ Order          : int  1 2 3 4 5 6 7 8 9 10 ...\n $ ID             : chr  \"EM:L633990898\" \"EM:L633980365\" \"EM:L633950211\" \"EM:L633950296\" ...\n $ Title          : chr  \"Effects of Mosquito Microbiota on the Survival Cost and Development Success of Avian Plasmodium\" \"Identifying Transcriptomic Signatures and Rules for SARS-CoV-2 Infection\" \"Characterization of Treponema pallidum Dissemination in C57BL/6 Mice\" \"Prevalence and Predictors of Antimicrobial Resistance Among Enterococcus spp. From Dogs Presented at a Veterina\"| __truncated__ ...\n $ Abstract       : chr  \"Both intrinsic and extrinsic factors affect the capacity of mosquitoes for the transmission of vector-borne pat\"| __truncated__ \"The world-wide Coronavirus Disease 2019 (COVID-19) pandemic was triggered by the widespread of a new strain of \"| __truncated__ \"The spirochetal pathogen Treponema pallidum causes 5 million new cases of venereal syphilis worldwide each year\"| __truncated__ \"Background: While surveillance of antimicrobial drug resistance is ongoing in human medicine in South Africa, t\"| __truncated__ ...\n $ DOI            : chr  \"10.3389/fmicb.2020.562220\" \"10.3389/fcell.2020.627302\" \"10.3389/fimmu.2020.577129\" \"10.3389/fvets.2020.589439\" ...\n $ URL            : chr  \"https://www.embase.com/a/#/search/results?id=L633990898\" \"https://www.embase.com/a/#/search/results?id=L633980365\" \"https://www.embase.com/a/#/search/results?id=L633950211\" \"https://www.embase.com/a/#/search/results?id=L633950296\" ...\n $ Authors        : chr  \"Martínez-de la Puente, J ;Gutiérrez-López, R ;Díez-Fernández, A ;Soriguer, R C ;Moreno-Indias, I ;Figuerola, J \" \"Zhang, Y -H ;Li, H ;Zeng, T ;Chen, L ;Li, Z ;Huang, T ;Cai, Y -D \" \"Lu, S ;Zheng, K ;Wang, J ;Xu, M ;Xie, Y ;Yuan, S ;Wang, C ;Wu, Y \" \"Oguttu, J W ;Qekwana, D N ;Odoi, A \" ...\n $ Journal        : chr  \"Frontiers in Microbiology\" \"Frontiers in Cell and Developmental Biology\" \"Frontiers in Immunology\" \"Frontiers in Veterinary Science\" ...\n $ Author_keywords: chr  \"Culex pipiens;ecology-diseases;malaria;microbiome;parasite transmission;vector-borne pathogens;virulence\" \"classification rule;COVID-19;SARS-CoV-2;signature;transcriptomic\" \"bacterial dissemination;C57BL/6 mice;inflammation;quantitative polymerase chain reaction;Treponema pallidum\" \"antimicrobial resistance;canine;dogs;enterococci;Enterococcus species;extensive-drug resistance;multi-drug resi\"| __truncated__ ...\n $ Keywords       : chr  \"gentamicin;streptomycin;unclassified drug;water;avian malaria;Culex pipiens;ecology;infectious agent;microbiome\"| __truncated__ \"biological marker;vaccine;classification algorithm;coronavirus disease 2019;adult;article;classifier;communicab\"| __truncated__ \"CD3 antigen ;Treponema pallidum;animal cell;animal experiment;animal model;animal tissue;apoptosis;article;bact\"| __truncated__ \"kanamycin;lincomycin;lincosamide;orbifloxacin;dog;Enterococcus;extensive drug resistance;multidrug resistance;p\"| __truncated__ ...\n $ Article_type   : chr  \"Article\" \"Article\" \"Article\" \"Article\" ...\n $ Published      : chr  \"13 Jan 2021\" \"11 Jan 2021\" \"8 Jan 2021\" \"7 Jan 2021\" ...\n $ Source         : chr  \"Embase\" \"Embase\" \"Embase\" \"Embase\" ...\n $ Source_type    : chr  \"parsed\" \"parsed\" \"parsed\" \"parsed\" ...\n $ Creation_date  : POSIXct, format: \"2022-01-12 18:59:14\" \"2022-01-12 18:59:14\" ..."

  output <- capture_output(str(parse_embase(import_data(file.path(target_folder, "Embase1.csv")), fake_now)))

  expect_snapshot_value(output, style = "deparse")
})

test_that("SCOPUS files are parsed correctly", {
  # expected <- "'data.frame':\t19 obs. of  16 variables:\n $ Order          : int  1 2 3 4 5 6 7 8 9 10 ...\n $ ID             : chr  \"SCP:2-s2.0-84950252162\" \"SCP:2-s2.0-84949995125\" \"SCP:2-s2.0-84928817990\" \"SCP:2-s2.0-84938888935\" ...\n $ Title          : chr  \"Modelling the trends of inpatient and outpatient rehabilitation for methamphetamine in the Western Cape provinc\"| __truncated__ \"Determinants of mastitis in women in the CASTLE study: A cohort study\" \"Mathematical assessment of the effect of traditional beliefs and customs on the transmission dynamics of the 20\"| __truncated__ \"Using a framework to implement large-scale innovation in medical education with the intent of achieving sustainability\" ...\n $ Abstract       : chr  \"Background: Dependence on methamphetamine remains one of the major health and social problem in the Western Cap\"| __truncated__ \"Background: Mastitis is an acute, debilitating condition that occurs in approximately 20 % of breastfeeding wom\"| __truncated__ \"Background: Ebola is one of the most virulent human viral diseases, with a case fatality ratio between 25% to 9\"| __truncated__ \"Background: Particularly when undertaken on a large scale, implementing innovation in higher education poses ma\"| __truncated__ ...\n $ DOI            : chr  \"10.1186/s13104-015-1741-4\" \"10.1186/s12875-015-0396-5\" \"10.1186/s12916-015-0318-3\" \"10.1186/s12909-014-0282-1\" ...\n $ URL            : chr  \"https://www.scopus.com/inward/record.uri?eid=2-s2.0-84950252162&doi=10.1186%2fs13104-015-1741-4&partnerID=40&md\"| __truncated__ \"https://www.scopus.com/inward/record.uri?eid=2-s2.0-84949995125&doi=10.1186%2fs12875-015-0396-5&partnerID=40&md\"| __truncated__ \"https://www.scopus.com/inward/record.uri?eid=2-s2.0-84928817990&doi=10.1186%2fs12916-015-0318-3&partnerID=40&md\"| __truncated__ \"https://www.scopus.com/inward/record.uri?eid=2-s2.0-84938888935&doi=10.1186%2fs12909-014-0282-1&partnerID=40&md\"| __truncated__ ...\n $ Authors        : chr  \"Mushanyu, J ;Nyabadza, F ;Stewart, A G R \" \"Cullinane, M ;Amir, L H ;Donath, S M ;Garland, S M ;Tabrizi, S N ;Payne, M S ;Bennett, C M \" \"Agusto, F B ;Teboh-Ewungkem, M I ;Gumel, A B \" \"Hudson, J N ;Farmer, E A ;Weston, K M ;Bushnell, J A \" ...\n $ Journal        : chr  \"BMC Research Notes\" \"BMC Family Practice\" \"BMC Medicine\" \"BMC Medical Education\" ...\n $ Author_keywords: chr  \"Inpatient rehabilitation; Least squares curve fitting; Methamphetamine; Outpatient rehabilitation; Reproduction number\" \"Breast infection; Breastfeeding; Mastitis; Staphylococcus aureus\" \"Community; Ebola; Health-care workers; Hospital; Quarantine\" \"Community-based; Large scale change; Longitudinal integrated clerkships; Medical education\" ...\n $ Keywords       : chr  \"central stimulant agent; methamphetamine; Amphetamine-Related Disorders; hospital patient; outcome assessment; \"| __truncated__ \"adolescent; adult; adverse effects; breast feeding; Candida albicans; candidiasis; clinical trial; female; foll\"| __truncated__ \"Article; basic reproduction number; clinical effectiveness; controlled study; disease association; early infect\"| __truncated__ \"clinical education; community medicine; curriculum; education; educational model; health care planning; human; \"| __truncated__ ...\n $ Article_type   : chr  \"Article\" \"Article\" \"Article\" \"Article\" ...\n $ N_citations    : num  14 26 37 8 33 30 19 31 3 3 ...\n $ Published      : num  2015 2015 2015 2015 2015 ...\n $ Source         : chr  \"Scopus\" \"Scopus\" \"Scopus\" \"Scopus\" ...\n $ Source_type    : chr  \"parsed\" \"parsed\" \"parsed\" \"parsed\" ...\n $ Creation_date  : POSIXct, format: \"2022-01-12 18:59:14\" \"2022-01-12 18:59:14\" ..."

  output <- capture_output(str(parse_scopus(import_data(file.path(target_folder, "Scopus1.csv")), fake_now)))

  expect_snapshot_value(output, style = "deparse")
})

test_that("PUBMED files are parsed correctly", {
  # expected <- "'data.frame':\t20 obs. of  16 variables:\n $ Order          : int  1 2 3 4 5 6 7 8 9 10 ...\n $ ID             : chr  \"PMID:32014114\" \"PMID:28837555\" \"PMID:32202401\" \"PMID:26336062\" ...\n $ Title          : chr  \"Nowcasting and forecasting the potential domestic and international spread of the  2019-nCoV outbreak originati\"| __truncated__ \"Spread of hospital-acquired infections: A comparison of healthcare networks.\" \"Urology practice during the COVID-19 pandemic.\" \"Transmission characteristics of MERS and SARS in the healthcare setting: a  comparative study.\" ...\n $ Abstract       : chr  \"BACKGROUND: Since Dec 31, 2019, the Chinese city of Wuhan has reported an outbreak  of atypical pneumonia cause\"| __truncated__ \"Hospital-acquired infections (HAIs), including emerging multi-drug resistant  organisms, threaten healthcare sy\"| __truncated__ \"The severe acute respiratory syndrome coronavirus 2 and the disease it causes,  coronavirus disease 2019 (COVID\"| __truncated__ \"BACKGROUND: The Middle East respiratory syndrome (MERS) coronavirus has caused  recurrent outbreaks in the Arab\"| __truncated__ ...\n $ DOI            : chr  \"10.1016/S0140-6736(20)30260-9\" \"10.1371/journal.pcbi.1005666\" \"10.23736/S0393-2249.20.03846-1\" \"10.1186/s12916-015-0450-0\" ...\n $ Authors        : chr  \"Wu, Joseph T; Leung, Kathy; Leung, Gabriel M\" \"Nekkab, Narimane; Astagneau, Pascal; Temime, Laura; Crépey, Pascal\" \"Ficarra, Vincenzo; Novara, Giacomo; Abrate, Alberto; Bartoletti, Riccardo; Crestani, Alessandro; De Nunzio, Cos\"| __truncated__ \"Chowell, Gerardo; Abdirizak, Fatima; Lee, Sunmi; Lee, Jonggul; Jung, Eunok; Nishiura, Hiroshi; Viboud, Cécile\" ...\n $ URL            : chr  \"https://pubmed.ncbi.nlm.nih.gov/32014114\" \"https://pubmed.ncbi.nlm.nih.gov/28837555\" \"https://pubmed.ncbi.nlm.nih.gov/32202401\" \"https://pubmed.ncbi.nlm.nih.gov/26336062\" ...\n $ Journal        : chr  \"Lancet (London, England)\" \"PLoS computational biology\" \"Minerva urologica e nefrologica = The Italian journal of urology and nephrology\" \"BMC medicine\" ...\n $ Journal_short  : chr  \"Lancet\" \"PLoS Comput Biol\" \"Minerva Urol Nefrol\" \"BMC Med\" ...\n $ Article_type   : chr  \"Journal Article; Research Support, Non-U.S. Gov't\" \"Journal Article\" \"Journal Article\" \"Comparative Study; Journal Article; Research Support, Non-U.S. Gov't\" ...\n $ Mesh           : chr  \"China/epidemiology; *Computer Simulation; Coronavirus Infections/*epidemiology/transmission; *Epidemics; Foreca\"| __truncated__ \"Algorithms; Cluster Analysis; Computational Biology/*methods; Cross Infection/epidemiology/prevention & control\"| __truncated__ \"Anesthesiology; COVID-19; *Coronavirus Infections; Hospitalization/statistics & numerical data; Hospitals; Huma\"| __truncated__ \"Aged; Coronavirus Infections/*transmission; Cross Infection/*epidemiology; Disease Outbreaks; Female; Hospitals\"| __truncated__ ...\n $ Author_keywords: chr  NA NA NA NA ...\n $ Published      : chr  \"2020 Feb 29\" \"2017 Aug\" \"2020 Jun\" \"2015 Sep 3\" ...\n $ Source         : chr  \"Pubmed\" \"Pubmed\" \"Pubmed\" \"Pubmed\" ...\n $ Source_type    : chr  \"parsed\" \"parsed\" \"parsed\" \"parsed\" ...\n $ Creation_date  : POSIXct, format: \"2022-01-12 18:59:14\" \"2022-01-12 18:59:14\" ..."

  output <- capture_output(str(parse_pubmed(readr::read_file(file.path(target_folder, "Pubmed.nbib")), fake_now)))

  expect_snapshot_value(output, style = "deparse")
})

test_that("read_bib_files() produces the expected output", {
  output <- read_bib_files(list.files(file.path("Records", "SessionParse"), full.names = TRUE, recursive = TRUE)) %>%
    lapply(function(el) {
      el$Creation_date <- NULL

      el
    })

  expect_snapshot_value(output, style = "serialize")
})
bakaburg1/BaySREn documentation built on March 30, 2022, 12:16 a.m.