# Test the output of createTextMatrixFromPDF using the dummy PDFs in tests/testthat
if (!require(testthat))
install.packages("testthat")
metaMatrix <- createTextMatrixFromPDF(getwd())
test_that("check metaMatrix structure", {
colname <-
c(
"Title",
"Year",
"Month",
"Day",
"Authors",
"Journal",
"Volume",
"Issue",
"Pages",
"CitedBy",
"CitationPerYear",
"DOI",
"Scopus-ID",
"Publisher",
"Affiliation",
"Affiliation-City",
"Affiliation-Country",
"FileName",
"Abstract",
"FullText",
"ID"
)
expect_equal(colname, colnames(metaMatrix))
expect_equal(2, nrow(metaMatrix))
expect_equal(21, ncol(metaMatrix))
})
test_that("check DOI info", {
expect_equal(c("10.1177/ToBeAssigned", "10.1364/ao.XX.XXXXXX"),
metaMatrix[, "DOI"])
})
test_that("check FileName info", {
files <-
c(file.path(getwd(), "dummy_article.pdf"),
file.path(getwd(), "dummy_article2.pdf"))
expect_equal(files, metaMatrix[, "FileName"])
})
test_that("check ID info", {
expect_equal(c("1", "2"), metaMatrix[, "ID"])
})
test_that("check full text info", {
text1 <-
" Journal Title\n XX(X):1–1\nLorem Ipsum c The Author(s) 2020\n Reprints and permission:\n sagepub.co.uk/journalsPermissions.nav\n DOI: 10.1177/ToBeAssigned\n www.sagepub.com/\n SAGE\nJia Yan Ng\nAbstract\nLorem ipsum dolor sit amet, consectetur adipiscing elit.\nKeywords\nSustainability, SciCloud, Publications\nIntroduction Table 1. Pellentesque pellentesque pharetra faucibus.\n Cras quis sapien gravida\nSed magna nisi, congue vel iaculis vulputate, auctor quis nisi.\nSed pulvinar ultricies nunc, at posuere mi facilisis at. In hac imperdiet 324 tempor pretium tortor eget\nhabitasse platea dictumst. Phasellus eros metus, tristique sed dapibus 326 iaculis phasellus non tristique odio\nullamcorper ac, gravida eget enim. Integer sed fermentum Quisque 346 Nulla viverra eleifend nulla viverra\n PCfour 572 euismod augue imperdiet viverra\nodio. Phasellus vitae tellus id massa blandit sollicitudin eu accumsan 235 lectus justo fringilla suscipit et\nsit amet elit. (Gotzian (2019) and Beeck (2020)). pulvinar 457 turpis interdum consectetur erat\n blandit 542 sollicitudin faucibus ac nibh vitae\nLiterature Review\n Proin ipsum ipsum\nAliquam pretium nisi lectus, eget sodales odio aliquam eu.\n Aenean Quisque nec leo condimentum.\nNunc pellentesque neque imperdiet quam iaculis mollis. efficitur ltricies nulla quis, consequat arcu.\nMaecenas sit amet est ut nulla placerat tempus suscipit in aliquam Curabitur, eget consequat tellus.\nfelis. Donec id commodo mauris, at luctus massa. Etiam\nblandit bibendum est, in volutpat metus condimentum a. In\nvel orci sit amet eros semper efficitur. Duis sollicitudin auctor References\nmolestie.\n Gotzian L (2019) A Guide to SciCloud, 4th edn. Addison-Wesley.\n (i) Praesent fringilla consequat auctor. Nullam elemen- Beeck J.J (2020) LATEX: a Tutorial to error handling, 2nd edn.\n tum, nunc vitae bibendum ultricies, metus leo ornare Addison-Wesley.\n dui, vel cursus est tortor a velit.\n (ii) Integer tincidunt metus mi, vel pellentesque nulla\n posuere tristique\nDiscussion\nMorbi in posuere est. Phasellus sit amet posuere justo. Ut\nelementum, dolor sed aliquam mollis, lacus est hendrerit\nligula, eu finibus ligula mauris vel odio. Vivamus viverra\ntempus justo eu porta. Aenean sit amet enim faucibus sapien\nfeugiat gravida. Aliquam ac urna sed nulla lacinia egestas et\nvestibulum enim. Sed tincidunt dui cursus venenatis ornare.\nResult\nNam consectetur quis ipsum sit amet consequat. Mauris\ndiam elit, fringilla sit amet pretium sed, tincidunt quis nibh.\nSuspendisse ac malesuada dui. Quisque interdum lacus vitae\nnunc lobortis, in pretium quam euismod. Donec tortor orci,\nrutrum dignissim metus ac, aliquet porta justo. Nullam porta, Corresponding author:\nlibero ut gravida gravida, lectus mi volutpat mi, et ornare Jia Yan Ng, Leuphana Univeristy\njusto erat mattis erat. Email: jia.y.ng@stud.leuphana.de\nPrepared using sagej.cls [Version: 2017/01/17 v1.20]\n"
text2 <-
" Letter Optica 1\nLegacy LATEX template for preparing an article for\nsubmission to OSA journals Applied Optics, Advances\nin Optics and Photonics, JOSA A, JOSA B, Optics\nLetters, Optica, and Photonics Research\nAUTHOR O NE1,2,3 , AUTHOR T WO2,* , AND AUTHOR T HREE1\n1 Publications Department, The Optical Society (OSA), 2010 Massachusetts Avenue NW, Washington D.C., 20036\n2 School of Science, University of Technology, 2000 J St. NW, Washington DC, 20036\n3 School of Optics, University of Technology, 2000 J St. NW, Washington DC, 20036\n* Corresponding author: email@my-email.com\nCompiled August 4, 2020\nAbstract: This legacy template can be used to prepare appear in the final article. Do not include a separate list of figure\na research article for submission to OSA’s journals Ap- captions and table titles.\nplied Optics, Advances in Optics and Photonics, JOSA Figures and Tables should be labelled and referenced in the\nA, JOSA B, Optics Letters, and Optica. Optics Letters standard way using the \\label{} and \\ref{} commands.\nauthors and authors of Optica letters and memoranda\nmay use this template for a precise length estimate. Use A. Sample Figure\nthe shortarticle/true option for Optics Letters and short\n Figure 1 shows an example figure.\nOptica articles. © 2020 Optical Society of America\nhttp://dx.doi.org/10.1364/ao.XX.XXXXXX\n1. INTRODUCTION\nThis legacy template is designed to assist with creating an article\nto submit to Applied Optics, Advances in Optics and Photonics,\nJOSA A, JOSA B, Optics Letters or Optica. See the OSA’s Style\nGuide and Manuscript Templates pages for more details. Please\nselect the appropriate journal abbreviation (ao, aop, josaa, josab,\nol, optica) in the document preamble.\n Use the shortarticle/false option for Applied Optics, JOSA A,\nand JOSA B. Use the shortarticle/true option for Optics Letters.\nFor Advances in Optics and Photonics, use the shortarticle/false\noption for Review Articles, and the shortarticle/true option for\nTutorials.\n If you have a question while using this template on Overleaf,\nplease use the help menu (“?”) on the top bar to search for help\nor ask us a question using our contact form.\n2. EXAMPLES OF ARTICLE COMPONENTS\nThe sections below show examples of different article compo- Fig. 1. False-color image, where each pixel is assigned to one\nnents. of seven reference spectra.\n3. FIGURES AND TABLES\nIt is not necessary to place figures and tables at the back of the B. Sample Table\nmanuscript. Figures and tables should be sized as they are to Table 1 shows an example table.\n Letter Optica 2\nTable 1. Shape Functions for Quadratic Line Elements\n local node { N }m {Φi }m (i = x, y, z)\n m=1 L1 (2L1 − 1) Φi1\n m=2 L2 (2L2 − 1) Φi2\n m=3 L3 = 4L1 L2 Φi3\n4. SAMPLE EQUATION\nLet X1 , X2 , . . . , Xn be a sequence of independent and identically\ndistributed random variables with E[ Xi ] = µ and Var[ Xi ] =\nσ2 < ∞, and let\n n\n X1 + X2 + · · · + X n 1\n Sn =\n n\n =\n n ∑ Xi (1) Fig. 2. (a) Three traps create three rings of magnetic nanoparti-\n cles. (b) The rings interact with one another.\n i\ndenote their mean. √ Then as n approaches infinity, the ran-\n B. Sample Dataset Citation\ndom variables n(Sn − µ) converge in distribution to a normal\nN (0, σ2 ). 1. M. Partridge, \"Spectra evolution during coat-\n ing,\" figshare (2014) [retrieved 13 May 2015],\n http://dx.doi.org/10.6084/m9.figshare.1004612.\n5. SAMPLE ALGORITHM\n C. Sample Code Citation\nAlgorithms can be included using the commands as shown in\n 2. C. Rivers, \"Epipy: Python tools for epidemi-\nalgorithm 1.\n ology\" (Figshare, 2014) [retrieved 13 May 2015],\n http://dx.doi.org/10.6084/m9.figshare.1005064.\nAlgorithm 1. Euclid’s algorithm\n 1: procedure E UCLID(a, b) . The g.c.d. of a and b 6. FUNDING\n 2: r ← a mod b Content in the funding section will be generated entirely from\n 3: while r 6= 0 do . We have the answer if r is 0 details submitted to Prism. Authors may add placeholder text\n 4: a←b in the manuscript to assess length, but any text added to this\n 5: b←r section in the manuscript will be replaced during production and\n 6: r ← a mod b will display official funder names along with any grant numbers\n 7: return b . The gcd is b provided. If additional details about a funder are required, they\n may be added to the Acknowledgments, even if this duplicates\n information in the funding section. See the example below in\n Acknowledgements.\nA. Supplementary materials in OSA journals\nOSA journals allow authors to include supplementary materi- 7. ACKNOWLEDGMENTS\nals as integral parts of a manuscript. Such materials are sub-\nject to peer-review procedures along with the rest of the pa- Acknowledgments should be included at the end of the docu-\nper and should be uploaded and described using OSA’s Prism ment. The section title should not follow the numbering scheme\nmanuscript system. Please refer to the Author Guidelines for of the body of the paper. Additional information crediting indi-\nSupplementary Materials in OSA Journals for more detailed viduals who contributed to the work being reported, clarifying\ninstructions on labeling supplementary materials and your who received funding from a particular source, or other infor-\nmanuscript. Visualizations, Data Files, Datasets, and Code must mation that does not fit the criteria for the funding block may\nbe associated with a figure, table, or equation, OR be referenced also be included; for example, “K. Flockhart thanks the National\nin the results section of the manuscript. Science Foundation for help identifying collaborators for this\n work.”\n Authors may also include Supplemental Documents (PDF\ndocuments with expanded descriptions or methods) with the\nprimary manuscript. At this time, supplemental PDF files are 8. DISCLOSURES\nnot accepted for partner titles, JOCN and Photonics Research. Disclosures should be listed in a separate section at the end of\nTo reference the supplementary document, the statement “See the manuscript. List the Disclosures codes identified on OSA’s\nSupplement 1 for supporting content.” should appear at the Conflict of Interest policy page. If there are no disclosures, then\nbottom of the manuscript (above the References heading). Please list “The authors declare no conflicts of interest.”\nnote that to create text color for supplementary materials links, Here are examples of disclosures:\nuse of the command\n Disclosures. ABC: 123 Corporation (I,E,P), DEF: 456 Corpora-\n\\textcolor{urlblue}{Visualization 1} is preferred to using\n tion (R,S). GHI: 789 Corporation (C).\nthe command\n\\url{Visualization 1}. Disclosures. The authors declare no conflicts of interest.\n Letter Optica 3\n9. REFERENCES\nNote that Optics Letters and Optica short articles use an abbrevi-\nated reference style. Citations to journal articles should omit the\narticle title and final page number; this abbreviated reference\nstyle is produced automatically when the Optics Letters journal\noption is selected in the template, if you are using a .bib file for\nyour references.\n However, full references (to aid the editor and reviewers)\nmust be included as well on a fifth informational page that\nwill not count against page length; again this will be produced\nautomatically if you are using a .bib file.\nAdd citations manually or use BibTeX. See [1–5].\nREFERENCES\n1. Y. Zhang, S. Qiao, L. Sun, Q. W. Shi, W. Huang, L. Li, and Z. Yang,\n “Photoinduced active terahertz metamaterials with nanostructured vana-\n dium dioxide film deposited by sol-gel method,” Opt. Express 22,\n 11070–11078 (2014).\n2. Optical Society, “OSA Publishing,” http://www.osapublishing.org.\n3. P. Forster, V. Ramaswamy, P. Artaxo, T. Bernsten, R. Betts, D. Fahey,\n J. Haywood, J. Lean, D. Lowe, G. Myhre, J. Nganga, R. Prinn, G. Raga,\n M. Schulz, and R. V. Dorland, “Changes in atmospheric consituents\n and in radiative forcing,” in Climate Change 2007: The Physical Science\n Basis. Contribution of Working Group 1 to the Fourth assesment report\n of Intergovernmental Panel on Climate Change, S. Solomon, D. Qin,\n M. Manning, Z. Chen, M. Marquis, K. B. Averyt, M. Tignor, and H. L.\n Miler, eds. (Cambridge University Press, 2007).\n4. R. McKay, “X-ray crystallography,” Ph.D. thesis, Princeton University\n (1982).\n5. V. S. C. Manga Rao and S. Hughes, “Single quantum-dot Purcell factor\n and β factor in a photonic crystal waveguide,” Phys. Rev. B 75 (2007).\n"
text1 <- gsub("[\r\n]", "", text1)
text2 <- gsub("[\r\n]", "", text2)
input1 <- paste(metaMatrix[1, "FullText"], collapse = "")
input2 <- paste(metaMatrix[2, "FullText"], collapse = "")
expect_equal(text1, gsub("[\r\n]", "", input1))
expect_equal(text2, gsub("[\r\n]", "", input2))
})
test_that("check fields that are NA", {
na.col <-
c(
"Title",
"Year",
"Month",
"Day",
"Authors",
"Journal",
"Volume",
"Issue",
"Pages",
"CitedBy",
"CitationPerYear",
"Scopus-ID",
"Publisher",
"Affiliation",
"Affiliation-City",
"Affiliation-Country",
"Abstract"
)
expect_true(all(is.na(metaMatrix[, na.col])))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.