inst/tests/testDataExtraction.R

context("Extracting data from a html document")

doc <- "<html>
<head></head>
<body>
<div id='player1' class='player'>
<span class='name'>Mike</SPAN>
<span class='level'>10</span>
<a href='http://someurl.com'>Complete profile</a>
</div>
<div id='player2' class='player'>
<span class='name'>Stan</span>
<a href='http://someurl2.com'>Complete profile</a> </div>
<div id='player3' class='player'>
<span class='name'>Bruce</span>
<span class='level'>21</span>
<a href='http://someurl3.com'>Complete profile</a>
  </div>
<DIV CLASS='TEST' name='Hello world'>insensitive</DIV>
<DIV id='test_2' CLASS='test_2' name='test 2'>test 2</DIV>
<DIV id='test-3' CLASS='test-3 lastTest' name='test 3' test-attr='glop'>test 3</DIV>
</body>
</html>"

doc <- htmlParse(doc)

test_that("Function cssApply extracts desired data.", {
  expect_equal(cssApply(doc, ".player>.name", cssCharacter), 
               c("Mike", "Stan", "Bruce"))
  expect_equal(cssApply(doc, "#player1>.name", cssCharacter), 
               "Mike")
  expect_equal(cssApply(doc, ".player>a", cssLink), 
               c("http://someurl.com", 
                 "http://someurl2.com", 
                 "http://someurl3.com"))
  expect_equal(cssApply(doc, ".player>.level", cssNumeric), 
               c(10, 21))
  expect_equal(cssApply(doc, "*[id]", cssId),
               c("player1", "player2", "player3", "test_2", "test-3"))
  expect_equal(cssApply(doc, "*[id class]", cssId),
               c("player1", "player2", "player3", "test_2", "test-3"))
  expect_equal(cssApply(doc, ".test-3#test-3", cssCharacter),
               "test 3")
  expect_equal(cssApply(doc, "#test-3.test-3", cssCharacter),
               "test 3")
  expect_equal(cssApply(doc, "#test-3.lasttest", cssCharacter),
               "test 3")
})

test_that("cssApplyInNodeSet works correctly", {
  expect_equal(cssApplyInNodeSet(doc, ".player", ".level", cssNumeric),
               list(10, NA, 21))
})

test_that("functions are case insensitive", {
  expect_equal(cssApply(doc, "div.TEST", cssCharacter), 
               "insensitive")
  expect_equal(cssApply(doc, "DIV.test", cssCharacter), 
               "insensitive")
  expect_equal(cssApply(doc, ".test[CLASS]", cssCharacter),
               "insensitive")
  expect_equal(cssApply(doc, "div.test", cssCharacter),
               "insensitive")
})

test_that("functions are insensitive to extra spaces", {
  expect_equal(cssApply(doc, ".player> .name", cssCharacter), 
               c("Mike", "Stan", "Bruce"))
  expect_equal(cssApply(doc, ".player >.name", cssCharacter), 
               c("Mike", "Stan", "Bruce"))
  expect_equal(cssApply(doc, ".player > .name", cssCharacter), 
               c("Mike", "Stan", "Bruce"))
  expect_equal(cssApply(doc, "*[ id]", cssId),
               c("player1", "player2", "player3", "test_2", "test-3"))
  expect_equal(cssApply(doc, "*[id ]", cssId),
               c("player1", "player2", "player3", "test_2", "test-3"))
  expect_equal(cssApply(doc, "*[ id='player1' ]", cssId),
               c("player1"))
  expect_equal(cssApply(doc, "*[ id = 'player1' ]", cssId),
               c("player1"))
  expect_equal(cssApply(doc, "*[ name = 'Hello world' ]", cssCharacter),
               "insensitive")
})

test_that("functions are insensitive to valid punctuations", {
  expect_equal(cssApply(doc, ".test_2", cssCharacter), 
               c("test 2"))
  expect_equal(cssApply(doc, ".test-3", cssCharacter), 
               c("test 3"))
  expect_equal(cssApply(doc, "#test-3", cssCharacter), 
               c("test 3"))
  expect_equal(cssApply(doc, "*[test-attr='glop']", cssCharacter), 
               c("test 3"))
  expect_equal(cssApply(doc, "*[test-attr]", cssCharacter), 
               c("test 3"))
})

context("Using pseudo classes")

doc <- "<html>
<head></head>
<body>
  <div id='content'>
    <h1>first child</h1>
    <p>second child</p>
    <p>third child</p>
    <h1>fourth child</h1>
  </div>
</body>
</html>"

doc <- htmlParse(doc)

test_that("pseudo classes work", {
  expect_equal(cssApply(doc, "p:nth-child(2)", cssCharacter), "second child")
  expect_equal(cssApply(doc, "h1:first-child", cssCharacter), "first child")
  expect_equal(cssApply(doc, "h1:last-child", cssCharacter), "fourth child")
  expect_equal(cssApply(doc, "p:first-child", cssCharacter), list())
  expect_equal(cssApply(doc, "p:last-child", cssCharacter), list())
  expect_equal(cssApply(doc, "p:nth-last-child(2)", cssCharacter), "third child")
  
  expect_equal(cssApply(doc, "p:nth-of-type(2)", cssCharacter), "third child")
  expect_equal(cssApply(doc, "p:first-of-type", cssCharacter), "second child")
  expect_equal(cssApply(doc, "p:last-of-type", cssCharacter), "third child")
  expect_equal(cssApply(doc, "p:nth-last-of-type(1)", cssCharacter), "third child")
})
cuche27/CSS documentation built on Oct. 20, 2019, 1:09 a.m.