test-10-main.R
In EdSurvey: Analysis of NCES Education Survey and Assessment Data

require(testthat)
require(EdSurvey)
options(width = 500)
options(useFancyQuotes = FALSE)
options(digits = 7)

sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer"))
source("REF-0-main.R") # has REF output in it
# ideally this wouldn't trip up any of the scope fixes below
dsex <- "should not be used"

context("wd is set correctly") # When this fails all regression tests are invalid.
test_that("wd is set correctly", {
  skip_on_cran()
  expect_true(file.exists("test-10-main.R"))
})

context("Primer reads in correctly")
test_that("Primer reads in correctly", {
  expect_is(sdf, "edsurvey.data.frame")
  expect_equal(dim(sdf), c(17606, 303))
  expect_equal(c(nrow(sdf), ncol(sdf)), c(17606, 303))
})

context("[, [[, [<-, [[<- edsurvey.data.frame")
test_that("[, [[, [<-, [[<- edsurvey.data.frame", {
  sdf2 <- sdf
  testVal <- sdf[ , "iep"]
  #test index number
  expect_equal(sdf2[ , 6], testVal)
  expect_equal(sdf2[[6]], testVal)
  
  #when assigned here, the index shifts since it will be saved to the esdf 'cache' data.frame and that is first listed in 'colnames'
  #we should document or note this somewhere
  sdf2[ , 6] <- testVal
  expect_equal(sdf2[ , 2], sdf[[6]], check.attributes = FALSE) #factor re-assignment won't exactly match attributes
  
  #note the origial index was 6, it was changed to 2 from first re-assignment
  sdf2[[2]] <- testVal
  expect_equal(sdf2[[2]], sdf[ , 6], check.attributes = FALSE) #factor re-assignment won't exactly match attributes
  
  #test by column name
  expect_equal(sdf2[ , "iep"], testVal, check.attributes = FALSE)
  expect_equal(sdf2[["iep"]], testVal, check.attributes = FALSE)
  
  testVal <- sdf[["dsex"]]
  sdf2[ , "dsex"] <- testVal
  expect_equal(sdf2[ , "dsex"], sdf[["dsex"]], check.attributes = FALSE)
  
  sdf2[["dsex"]] <- testVal
  expect_equal(sdf2[ , "dsex"], sdf[["dsex"]], check.attributes = FALSE)
  
  expect_error(sdf2[ , 999999], "Column index out of range.*")
  expect_error(sdf2[ , "NOT A COLUMN ZZZZ"], "The following variable names are required for this call.*")
  
  expect_error(sdf2[[9999999]] <- testVal, "Column index out of range.*")
  expect_error(sdf2[["NOT A COLUMN ZZZ"]] <- testVal, "Cannot find.*as a column name or list item in this object")
})

context("$ assign")
test_that("$ assign", {
  # subset then assign
  sdfM <- subset(sdf, dsex == "Male")
  sdfM$books <- ifelse(sdfM$b013801 %in% c("0-10", "11-25"), "0-25 books", "26+ books")
  sdf$books <- ifelse(sdf$b013801 %in% c("0-10", "11-25"), "0-25 books", "26+ books")
  sdfM2 <- subset(sdf, dsex == "Male")
  tab <- table(sdfM$books, sdfM2$books)
  diag(tab) <- 0
  expect_equal(as.vector(tab), rep(0, 4))
  # assign a new variable
  sdf$a <- sdf$dsex
  tab <- table(sdf$a, sdf$dsex)
  expect_equal(tab, assignTableREF)
  # overwrite a variable, common for recoding
  # note: dsex has no NAs on the reporting sample
  sdf$dsex <- ifelse(sdf$dsex %in% "Male", "boy", "girl")
  tab2 <- table(sdf$a, sdf$dsex)
  expect_equal(unname(tab2), unname(assignTableREF))

  expect_warning(sdf$a[1:5] <- "invalid", "factor level")
  # repeated to be sure this does not throw an error, which it used to
  expect_warning(sdf$a[1:5] <- "invalid", "factor level")
  # reset sdf
  sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer"))
})

context("showPlausibleValues and showWeights verbose output agrees")
test_that("showPlausibleValues and showWeights verbose output agrees", {
  skip_on_cran()
  spv <- c(
    "There are 6 subject scale(s) or subscale(s) in this edsurvey.data.frame:",
    "'num_oper' subject scale or subscale with 5 plausible values.",
    "  The plausible value variables are: 'mrps11', 'mrps12', 'mrps13', 'mrps14', and 'mrps15'",
    "", "'measurement' subject scale or subscale with 5 plausible values.",
    "  The plausible value variables are: 'mrps21', 'mrps22', 'mrps23', 'mrps24', and 'mrps25'",
    "", "'geometry' subject scale or subscale with 5 plausible values.",
    "  The plausible value variables are: 'mrps31', 'mrps32', 'mrps33', 'mrps34', and 'mrps35'",
    "", "'data_anal_prob' subject scale or subscale with 5 plausible values.",
    "  The plausible value variables are: 'mrps41', 'mrps42', 'mrps43', 'mrps44', and 'mrps45'",
    "", "'algebra' subject scale or subscale with 5 plausible values.",
    "  The plausible value variables are: 'mrps51', 'mrps52', 'mrps53', 'mrps54', and 'mrps55'",
    "", "'composite' subject scale or subscale with 5 plausible values (the default).",
    "  The plausible value variables are: 'mrpcm1', 'mrpcm2', 'mrpcm3', 'mrpcm4', and 'mrpcm5'",
    ""
  )
  co <- capture.output(showPlausibleValues(sdf, verbose = TRUE))
  expect_equal(co, spv)
  skip_on_cran()
  sw <- c(
    "There is 1 full sample weight in this edsurvey.data.frame:",
    "  'origwt' with 62 JK replicate weights (the default).", "    Jackknife replicate weight variables associated with the full sample weight 'origwt':",
    "    'srwt01', 'srwt02', 'srwt03', 'srwt04', 'srwt05', 'srwt06', 'srwt07', 'srwt08', 'srwt09', 'srwt10', 'srwt11', 'srwt12', 'srwt13', 'srwt14', 'srwt15', 'srwt16', 'srwt17', 'srwt18', 'srwt19', 'srwt20', 'srwt21', 'srwt22', 'srwt23', 'srwt24', 'srwt25', 'srwt26', 'srwt27', 'srwt28', 'srwt29', 'srwt30', 'srwt31', 'srwt32', 'srwt33', 'srwt34', 'srwt35', 'srwt36', 'srwt37', 'srwt38', 'srwt39', 'srwt40', 'srwt41', 'srwt42', 'srwt43', 'srwt44',",
    "    'srwt45', 'srwt46', 'srwt47', 'srwt48', 'srwt49', 'srwt50', 'srwt51', 'srwt52', 'srwt53', 'srwt54', 'srwt55', 'srwt56', 'srwt57', 'srwt58', 'srwt59', 'srwt60', 'srwt61', and 'srwt62'",
    ""
  )
  withr::with_options(
    list(digits = 4, width = 500),
    co <- capture.output(showWeights(sdf, verbose = TRUE))
  )
  expect_equal(co, sw)
})

context("searchSDF")
test_that("searchSDF", {
  search1 <- searchSDF(string = c("home", "book"), data = sdf)
  search2 <- searchSDF(string = c("home|book"), data = sdf)
  search3 <- searchSDF(string = "value", data = sdf, levels = TRUE)

  # drop 'fileFormat' var for quick comparison, update the RDS when able
  search1$fileFormat <- NULL
  search2$fileFormat <- NULL
  search3$fileFormat <- NULL

  search2_ref <- structure(list(variableName = c("b013801", "b017001", "b017101", 
"b018201", "b017451", "t088804", "t088805", "t091503"), Labels = c("Books in home", 
"Newspaper in home", "Computer at home", "Language other than English spoken in home", 
"Talk about studies at home", "Computer activities: Use a gradebook program", 
"Computer activities: Post homework,schedule info", "G8Math:How often use Geometry sketchbook"
)), row.names = c(NA, 8L), class = "data.frame")
  search3_ref <- structure(list(variableName = c("m086101", "m020001", "m143601", 
"m142301"), Labels = c("Read value from graph", "Apply place value                            (R1)", 
"Solve for x given value of n", "Identify place value"), Levels = c("1. A; 2. B; 3. C *; 4. D; 5. E; 8. Omitted; 9. Not Reached; 0. Multiple", 
"1. Incorrect; 2. Correct*; 5. Illegible; 6. Off Task; 7. Non-Rateable; 8. Omitted; 9. Not Reached", 
"1. A; 2. B; 3. C; 4. D *; 5. E; 8. Omitted; 9. Not Reached; 0. Multiple", 
"1. A; 2. B; 3. C; 4. D; 5. E *; 8. Omitted; 9. Not Reached; 0. Multiple"
)), row.names = c("Student.131", "Student.165", "Student.177", "Student.214"), class = c("searchSDF", "data.frame"))
  expect_equal(search1, structure(list(variableName = "b013801", Labels = "Books in home"), row.names = 1L, class = "data.frame"))
  expect_equal(search2, search2_ref)
  expect_equal(search3, search3_ref)
})

context("showCodebook")
test_that("showCodebook", {
  cb <- showCodebook(sdf, "school")
  sdfRecode <- recode.sdf(sdf, recode = list(dsex = list(from = c("Male"), to = c("MALE"))))
  cb2 <- showCodebook(sdfRecode, c("student", "school"), labelLevels = FALSE, includeRecodes = TRUE)

  expect_known_value(cb, file = "showCodebook.rds", update = FALSE)
  expect_known_value(cb2, file = "showCodebookRecodes.rds", update = FALSE)
})

context("getData")
test_that("getData", {
  expect_known_value(head(gd1 <- getData(sdf, c("dsex", "b017451"))), file = "gd1.rds", update = FALSE)
  skip_on_cran()
  expect_known_value(head(gd2 <- getData(sdf, c("dsex", "b017451"), defaultConditions = FALSE)), file = "gd2.rds", update = FALSE)
  expect_known_value(head(gd3 <- getData(sdf, c("dsex", "b017451"), dropUnusedLevels = FALSE)), file = "gd3.rds", update = FALSE)
  expect_known_value(head(gd4 <- getData(sdf, c("dsex", "b017451"), dropOmittedLevels = TRUE, includeNaLabel = FALSE)), file = "gd4.rds", update = FALSE)
  expect_known_value(head(gd5 <- getData(sdf, c("dsex", "b017451"), dropOmittedLevels = TRUE, includeNaLabel = TRUE)), file = "gd5.rds", update = FALSE)
  gd6 <- getData(sdf, formula = composite ~ dsex + b017451)
  gd6 <- gd6[c(1:50, (nrow(gd6) - 50):nrow(gd6)), ] # this file was larger. slim down a bit.
  expect_known_value(gd6, file = "gd6.rds", update = FALSE, check.attributes = FALSE) # TEMPORARILY HAVE IT SKIP ATTRIBUTE CHECKS
  gddat <- getData(sdf,
    c("composite", "geometry", "dsex", "sdracem", "pared", "b017451", "origwt"),
    addAttributes = TRUE, dropOmittedLevels = FALSE
  )
  gddat <- gddat[c(1:50, (nrow(gddat) - 50):nrow(gddat)), ] # this file was larger. slim down a bit.
  attributes(gddat)$dataList$Student$lafObject <- NULL
  attributes(gddat)$dataList$School$lafObject <- NULL
  attributes(gddat)$fr2Path <- NULL
  attributes(gddat)$scoreCard <- NULL
  attributes(gddat)$dichotParamTab <- NULL
  attributes(gddat)$polyParamTab <- NULL
  attributes(gddat)$adjustedData <- NULL
  attributes(gddat)$scoreFunction <- NULL
  attributes(gddat)$testData <- NULL
  attributes(gddat)$scoreDict <- NULL
  attributes(gddat)$cacheDataLevelName <- NULL
  attributes(gddat)$dataList$Student$conflictLevels <- NULL
  attributes(gddat)$dataList$School$conflictLevels <- NULL
  expect_known_value(gddat, file = "gddat.rds", update = FALSE)

  expect_known_value(head(gd7 <- getData(sdf, c("dsex", "b017451"))), file = "gd7.rds", update = FALSE)
  expect_known_value(head(gd8 <- getData(sdf, c("dsex", "c052601"), dropUnusedLevels = FALSE)), file = "gd8.rds", update = FALSE) # schoolMergeVarStudent="scrpsu", schoolMergeVarSchool="sscrpsu"
  df2 <- getData(sdf, c("dsex", "b017451"),
    recode = list(
      b017451 = list(
        from = c(
          "Never or hardly ever",
          "Once every few weeks",
          "About once a week"
        ),
        to = c("Infrequently")
      ),
      b017451 = list(
        from = c(
          "2 or 3 times a week",
          "Every day"
        ),
        to = c("Frequently")
      )
    )
  )
  expect_known_value(head(df2), file = "df2.rds", update = FALSE)
  # use recode for both recodes
  df2B <- recode.sdf(sdf, recode = list(
    b017451 = list(
      from = c(
        "Never or hardly ever",
        "Once every few weeks",
        "About once a week"
      ),
      to = c("Infrequently")
    ),
    b017451 = list(
      from = c(
        "2 or 3 times a week",
        "Every day"
      ),
      to = c("Frequently")
    )
  ))
  df2C <- getData(df2B, c("dsex", "b017451"))
  expect_equal(df2, df2C)
  # use recode for just one and get data for the other
  df2D <- recode.sdf(sdf, recode = list(b017451 = list(
    from = c(
      "Never or hardly ever",
      "Once every few weeks",
      "About once a week"
    ),
    to = c("Infrequently")
  )))
  df2E <- getData(df2D, c("dsex", "b017451"),
    recode = list(b017451 = list(
      from = c(
        "2 or 3 times a week",
        "Every day"
      ),
      to = c("Frequently")
    ))
  )
  expect_equal(df2, df2E)
  # numeric recodes
  df3 <- getData(sdf,
    c("dsex", "b017451"),
    recode = list(
      b017451 = list(
        from = c(1, 2, 3),
        to = c("Infrequently")
      ),
      b017451 = list(
        from = c(4, 5),
        to = c("Frequently")
      )
    )
  )
  expect_equal(df2, df3) # recode by label and numeric agree

  sdf_males <- EdSurvey:::subset(sdf, dsex == "Male", verbose = FALSE)
  expect_equal(dim(sdf_males), c(8905, 303))

  sdf_males <- EdSurvey:::subset(sdf, dsex %in% "Male", verbose = FALSE)
  expect_equal(dim(sdf_males), c(8905, 303))

  # test bad subset, a$bb does not exist
  expect_error(EdSurvey:::subset(sdf, dsex %in% a$bb, verbose = FALSE))
  expect_error(EdSurvey:::subset(sdf, dsex %in% bb$a, verbose = FALSE))

  # test subset using an element from the parent frame
  # does not work in testthat with test()
  if (FALSE) {
    print(search())
    env <- new.env(hash = TRUE, parent = .GlobalEnv, size = 1L)
    assign("a", list(b = "Male"), envir = env)
    sdf_males <- with(env, EdSurvey:::subset(sdf, dsex %in% a$b, verbose = FALSE))
    expect_equal(dim(sdf_males), c(8905, 303))
  }
})

context("getData order of userConditions")
test_that("getData order of userConditions", {
  # subset first, then recode
  sdf_males <- subset(sdf, dsex %in% "Male")
  sdf_males <- recode.sdf(sdf_males, recode = list(dsex = list(from = "Male", to = "Boy")))
  gdat1 <- getData(sdf_males, c("dsex"))
  expect_equal(nrow(gdat1), 8486)
  expect_equal(unique(as.character(gdat1$dsex)), "Boy")

  # recode first then subset
  sdf_males <- recode.sdf(sdf, recode = list(dsex = list(from = "Male", to = "Boy")))
  sdf_males <- subset(sdf_males, dsex %in% "Boy")
  gdat2 <- getData(sdf_males, c("dsex"))
  expect_equal(gdat2, gdat1)
})

context("rename.sdf")
test_that("rename.sdf", {
  skip_on_cran()
  # check rename only
  sdf_rename <- rename.sdf(sdf, c("dsex", "composite", "origwt"), c("gender", "composite0", "totwgt"))
  expect_equal(attr(getAttributes(sdf_rename, "pvvars"), "default"), "composite0")
  expect_equal(attr(getAttributes(sdf_rename, "weights"), "default"), "totwgt")
  gDat1 <- getData(sdf, c("dsex", "composite", "origwt"))
  gDat2 <- getData(sdf_rename, c("gender", "composite0", "totwgt"))
  names(gDat1) <- NULL
  names(gDat2) <- NULL
  expect_equal(gDat1, gDat2)
  expect_equal(levelsSDF("dsex", sdf)$dsex, levelsSDF("gender", sdf_rename)$gender)

  # check rename along with subset and recode.sdf
  sdf_males <- subset(sdf, dsex %in% "Male")
  gDat1 <- getData(sdf_males, "dsex")
  sdf_males <- rename.sdf(sdf_males, "dsex", "gender")
  sdf_males <- recode.sdf(sdf_males, list(gender = list(from = "Male", to = "Boy")))
  gDat2 <- getData(sdf_males, "gender")
  expect_equal(as.numeric(table(gDat1$dsex)), as.numeric(table(gDat2$gender)))

  # complicated order of operations
  sdf_rename <- rename.sdf(sdf, "sdracem", "race")
  sdf_rename <- subset(sdf_rename, !race %in% c("Omitted"))
  sdf_rename <- rename.sdf(sdf_rename, "race", "race_recode")
  sdf_rename <- recode.sdf(sdf_rename, recode = list(race_recode = list(
    from = c("Hispanic", "Asian/Pacific Island", "Amer Ind/Alaska Natv"),
    to = "Other"
  )))
  sdf_rename <- subset(sdf_rename, race_recode %in% "Other")
  expect_equal(nrow(sdf_rename), nrow(subset(sdf, sdracem %in% c("Hispanic", "Asian/Pacific Island", "Amer Ind/Alaska Natv", "Other"))))
})

context("subset throws an error")
test_that("subset throws an error", {
  expect_error(sdf_error <- subset(sdf, dsex1 %in% "Male"))
  expect_error(sdf_error <- subset(sdf, dsex == "Male" & dsex1 == "Male"))
})

context("lm.sdf")
lm1 <- lm.sdf(~ dsex + b017451, sdf)
lm1$data <- NULL
lm1$call <- NULL

test_that("lm.sdf", {
  skip_on_cran()
  expect_equal(lapply(lm1, head), lm1_head)

  lm1S <- lm.sdf(~ dsex + b017451, sdf, standardizeWithSamplingVar = TRUE)
  withr::with_options(
    list(digits = 4),
    slm1Scoef <- capture.output(summary(lm1S, src = TRUE)$coefmat)
  )
  expect_equal(slm1Scoef, stdCoefREF)

  lm10 <- lm.sdf(composite ~ dsex + b017451, sdf)
  lm10B <- lm.sdf(composite ~ dsex + b017451, sdf, weightVar = "origwt")
  lm10C <- lm.sdf(composite ~ dsex + b017451, sdf, weightVar = origwt)
  lm10D <- lm.sdf(composite ~ dsex + b017451, sdf, weightVar = c("origwt"))
  expect_equal(coef(lm10), coef(lm10B))
  expect_equal(coef(lm10), coef(lm10C))
  expect_equal(coef(lm10), coef(lm10D))

  lm10 <- lm.sdf(composite ~ dsex + b017451, sdf)
  lm10$data <- NULL
  lm10$call <- NULL
  expect_equal(lapply(lm10, head), lm10_head)
  lm1f <- lm.sdf(composite ~ dsex + b017451, sdf, relevels = list(dsex = "Female"))
  lm1f$data <- NULL
  lm1f$residuals <- head(lm1f$residuals)
  lm1f$PV.residuals <- head(lm1f$PV.residuals)
  lm1f$PV.fitted.values <- head(lm1f$PV.fitted.values)
  expect_known_value(lapply(lm1f, head), "lm1f.rds", update = FALSE)
  lm1re <- lm.sdf(composite ~ dsex + b017451, sdf, recode = list(dsex = list(from = "Male", to = "MALE")))
  lm1re$data <- NULL
  lm1re$residuals <- head(lm1re$residuals)
  lm1re$PV.residuals <- head(lm1re$PV.residuals)
  lm1re$PV.fitted.values <- head(lm1re$PV.fitted.values)
  expect_known_value(lapply(lm1re, head), "lm1re.rds", update = FALSE)
  # test that lfactor levels can be used in relevels argument
  lm1f2 <- lm.sdf(composite ~ dsex + b017451, sdf, jrrIMax = 1, relevels = list(dsex = 2))
  lm1f2$data <- NULL
  lm1f2$residuals <- head(lm1f2$residuals)
  lm1f2$PV.residuals <- head(lm1f2$PV.residuals)
  lm1f2$PV.fitted.values <- head(lm1f2$PV.fitted.values)
  # calls will not be equal
  lm1f$call <- lm1f2$call <- NULL
  expect_equal(lm1f, lm1f2)
  expect_equal(summary(lm1f), summary(lm1f2))
})

context("lm.sdf Taylor series")

expect_is(
  sdf_taylor <- lm.sdf(composite ~ sdracem + dsex + pared,
    subset(sdf, pared == 1 | pared == 2, verbose = FALSE),
    weightVar = "origwt",
    varMethod = "Taylor",
    jrrIMax = Inf
  ),
  "edsurveyLm"
)
expect_is(
  sdf_taylor <- lm.sdf(composite ~ sdracem + dsex + pared,
    data = subset(sdf, pared == 1 | pared == 2, verbose = FALSE),
    weightVar = "origwt",
    varMethod = "Taylor",
    jrrIMax = Inf
  ),
  "edsurveyLm"
)
sdf_taylorWV <- lm.sdf(composite ~ sdracem + dsex + pared,
  subset(sdf, pared == 1 | pared == 2, verbose = FALSE),
  weightVar = origwt,
  varMethod = "Taylor",
  jrrIMax = Inf
)
sdf_taylorWV$call <- sdf_taylor$call <- NULL
expect_equal(sdf_taylor, sdf_taylorWV)
lm1t <- lm.sdf(composite ~ dsex + b017451, sdf, varMethod = "Taylor")
lm1t$data <- NULL
lm1t$residuals <- head(lm1t$residuals)

lm1t$PV.residuals <- head(lm1t$PV.residuals)
lm1t$PV.fitted.values <- head(lm1t$PV.fitted.values)
expect_known_value(lapply(lm1t, head), "lm1t.rds", update = FALSE)
lm1jk <- lm.sdf(composite ~ dsex + b017451, sdf, varMethod = "jackknife")
expect_equal(coef(lm1t), coef(lm1jk))
lm1jk$data <- NULL
lm1jk$residuals <- head(lm1jk$residuals)
lm1jk$PV.residuals <- head(lm1jk$PV.residuals)
lm1jk$PV.fitted.values <- head(lm1jk$PV.fitted.values)
lm1jk <- summary(lm1jk, src = TRUE)
lm1jk.ref <- lm1
lm1jk.ref$formula <- NULL
lm1jk$call <- NULL
lm1jk$formula <- NULL
lm1jk$coefmat$stdCoef <- NULL
lm1jk$coefmat$stdSE <- NULL
expect_equal(lapply(lm1jk, head), lapply(lm1jk.ref, head))
# estimates should agree too
test_that("lm.sdf Taylor series", {
  skip_on_cran()

  lm2ta <- lm.sdf(composite ~ dsex + sdracem + yrsmath, sdf, varMethod = "Taylor")
  lm2jka <- lm.sdf(composite ~ dsex + sdracem + yrsmath, sdf, varMethod = "jackknife")
  # check only estimates
  expect_equal(coef(lm2ta), coef(lm2jka))

  lm2t <- lm.sdf(composite ~ dsex + sdracem + yrsmath, sdf, relevels = list(dsex = "Female"), varMethod = "Taylor")
  lm2t$data <- NULL
  lm2t$residuals <- head(lm2t$residuals)
  lm2t$PV.residuals <- head(lm2t$PV.residuals)
  lm2t$PV.fitted.values <- head(lm2t$PV.fitted.values)
  expect_known_value(lm2t, "lm2t.rds", update = FALSE)
  lm2jk <- lm.sdf(composite ~ dsex + sdracem + yrsmath, sdf, varMethod = "jackknife", relevel = list(dsex = "Female"), jrrIMax = 1)
  lm2jk$data <- NULL
  lm2jk$residuals <- head(lm2jk$residuals)
  lm2jk$PV.residuals <- head(lm2jk$PV.residuals)
  lm2jk$PV.fitted.values <- head(lm2jk$PV.fitted.values)
  lm2jk$call <- NULL
  expect_known_value(lm2jk, "lm2.rds", update = FALSE)
  expect_equal(coef(lm2t), coef(lm2jk))
})


context("edsurveyTable")
test_that("edsurveyTable", {
  skip_on_cran()
  # two levels, results checked vs Primer
  es1 <- edsurveyTable(composite ~ dsex + b017451, sdf, jrrIMax = 1)
  es1c <- capture.output(es1)
  expect_equal(es1c, es1REF)
  # test no LHS variable
  es10 <- edsurveyTable(~ dsex + b017451, sdf, jrrIMax = 1)
  es10c <- capture.output(es10)
  expect_equal(es10c, es1REF)

  # check for just males (dsex is only occupied at one level)
  sdfm <- subset(sdf, dsex == "Male", verbose = FALSE)
  es2 <- edsurveyTable(composite ~ dsex + b017451, sdfm, jrrIMax = Inf)
  es2c <- capture.output(es2)
  expect_equal(es2c, es2REF)
  # test dropOmittedLevels
  es2b <- edsurveyTable(composite ~ dsex + b017451, sdfm, jrrIMax = 1, dropOmittedLevels = FALSE)
  es2bc <- capture.output(es2b)
  expect_equal(es2bc, es2bREF)

  # test unbalanced tables, this check verified. See email from Ting on 10/1/2015 at 5:25 Eastern
  es3 <- edsurveyTable(composite ~ lep + ell3, sdf, jrrIMax = 1)
  es3c <- capture.output(es3)
  expect_equal(es3c, es3REF)
  # check return.means and return.sepct arguments
  es3b <- edsurveyTable(composite ~ lep + ell3, sdf, jrrIMax = 1, returnMeans = FALSE)
  expect_equal(es3b$data, es3b$data[, c("lep", "ell3", "N", "WTD_N", "PCT", "SE(PCT)")])
  es3c <- edsurveyTable(composite ~ lep + ell3, sdf, jrrIMax = 1, returnMeans = FALSE, returnSepct = FALSE)
  expect_equal(es3c$data, es3$data[, c("lep", "ell3", "N", "WTD_N", "PCT")])
  # test unbalanced tables, with three levels. This check verified. See email from Ting on 10/1/2015 at 5:25 Eastern
  es4 <- edsurveyTable(composite ~ lep + ell3 + dsex, sdf, jrrIMax = 1)
  es4c <- capture.output(es4)
  expect_equal(es4c, es4REF)
  # test LHS variable that isn't composite
  es11 <- edsurveyTable(b017451 ~ dsex, sdf)
  es11c <- capture.output(es11)
  expect_equal(es11c, es11REF1)
  # recode
  es1r <- edsurveyTable(composite ~ dsex + b017451, sdf, jrrIMax = 1, recode = list(dsex = list(from = "Male", to = "MALE")))
  es1rc <- capture.output(es1r)
  expect_equal(es1rc, es1rREF)
  # test omitted levels problematic sparseness case
  suppressWarnings(es5 <- edsurveyTable(composite ~ dsex + b017451 + b003501, data = sdf, dropOmittedLevels = FALSE, pctAggregationLevel = 2))
  es5c <- capture.output(es5)
  expect_equal(es5c, es5REF)
})

context("edsurveyTable with N=0")
test_that("edsurveyTable with N=0", {
  skip_on_cran()
  expect_warning(es0 <- edsurveyTable(~ b003501 + m815401, data = sdf, dropOmittedLevels = FALSE))
  esDF <- getData(sdf, c("b003501", "m815401"), dropOmittedLevels = FALSE)
  esDFtable <- as.data.frame(table(esDF))
  esDFtable <- esDFtable[order(esDFtable$b003501), ]
  expect_equal(nrow(es0$data), nrow(expand.grid(levels(esDF$m815401), levels(esDF$b003501))))
  expect_equal(es0$data$N, esDFtable$Freq)
  expect_equal(capture.output(es0), estwith0REF)

  expect_warning(es1 <- edsurveyTable(~ b003501 + m815401, data = sdf, dropOmittedLevels = FALSE, pctAggregationLevel = 0))
  expect_equal(nrow(es1$data), nrow(esDFtable))
})

context("edsurveyTable with no rhs variable")
test_that("edsurveyTable with no rhs variable", {
  skip_on_cran()
  es <- edsurveyTable(composite ~ 1, data = sdf, returnVarEstInputs = TRUE)
  esc <- capture.output(es)
  expect_equal(esc, es_norhsREF)
})


context("gap with N=0")
test_that("gap with N=0", {
  skip_on_cran()
  gap0 <- gap("b003501",
    data = sdf,
    groupA = m815401 %in% "Multiple",
    groupB = "default",
    dropOmittedLevels = FALSE, targetLevel = "Multiple",
    returnSimpleDoF = TRUE, returnVarEstInputs = TRUE,
    returnSimpleN = TRUE
  )
  gap0c <- capture.output(gap0)
  expect_equal(gap0c, gap0REF)
})

context("edsurveyTable2pdf")
test_that("edsurveyTable2pdf", {
  skip_on_cran()
  est1 <- edsurveyTable(composite ~ dsex + b017451, sdf)
  expect_equal(
    capture.output(edsurveyTable2pdf(
      data = est1,
      formula = b017451 ~ dsex,
      toCSV = "",
      filename = "CONSOLE",
      returnMeans = FALSE
    )),
    pdf_estREF
  )
})

context("edsurveyTable Taylor")
test_that("edsurveyTable Taylor", {
  skip_on_cran()
  es1t <- edsurveyTable(composite ~ dsex + b017451, sdf, jrrIMax = 1, varMethod = "Taylor")
  es1tc <- capture.output(es1t)
  # compare Taylor to jackknife, only columns that should agree
  es1j <- edsurveyTable(composite ~ dsex + b017451, sdf, jrrIMax = 1, varMethod = "jackknife")
  es1t$njk <- NULL
  es1j$njk <- NULL
  es1t$varMethod <- NULL
  es1j$varMethod <- NULL
  es1j$data["SE(PCT)"] <- NULL
  es1t$data["SE(PCT)"] <- NULL
  es1j$data["SE(MEAN)"] <- NULL
  es1t$data["SE(MEAN)"] <- NULL
  expect_equal(es1j, es1t)

  # check Taylor output
  expect_equal(es1tc, es1tREF)
  # test unbalanced tables, note some singleton PSUs
  expect_warning(es3t <- edsurveyTable(composite ~ lep + ell3, sdf, jrrIMax = Inf, varMethod = "Taylor"))
  es3tc <- capture.output(es3t)
  expect_equal(es3tc, es3tREF)
  # test unbalanced tables
  expect_warning(es4t <- edsurveyTable(composite ~ lep + ell3 + dsex, sdf, jrrIMax = 1, varMethod = "Taylor"))
  es4tc <- capture.output(es4t)
  expect_equal(es4tc, es4tREF)

  # check for just males (dsex is only occupied at one level)
  sdfm <- subset(sdf, dsex == "Male", verbose = FALSE)
  es2t <- edsurveyTable(composite ~ dsex + b017451, sdfm, jrrIMax = 1, varMethod = "Taylor")
  es2tc <- capture.output(es2t)
  expect_equal(es2tc, es2tREF)
})

test_that("variable label stored as attributes", {
  est1 <- edsurveyTable(composite ~ dsex + b017451, sdf, jrrIMax = 1)
  expect_equal(attr(est1$data$dsex, "label"), "Gender")
  expect_equal(attr(est1$data$b017451, "label"), "Talk about studies at home")
})

context("showCutPoints agrees")
test_that("showCutPoints agrees", {
  skip_on_cran()
  sw <- c(
    "Achievement Levels:",
    "  Mathematics:  262, 299, 333"
  )
  co <- capture.output(showCutPoints(sdf))
  expect_equal(sw, co)
})

context("updatePlausibleValue")
test_that("updatePlausibleValue", {
  skip_on_cran()
  lma <- lm.sdf(~dsex, sdf, varMethod = "Taylor")
  sdfb <- updatePlausibleValue("composite", "newname", sdf)
  lmb <- lm.sdf(~dsex, sdfb, varMethod = "Taylor")
  lmb$call <- lma$call <- NULL # the call is different
  lmb$formula <- lma$formula <- NULL # the formula has the default value substituted in and so is different
  lmb$data <- lma$data <- NULL
  expect_equal(lma, lmb)
})

context("percentile")
test_that("percentile", {
  skip_on_cran()
  expect_known_value(pct1 <- percentile("composite", c(0, 1, 25, 50, 75, 99, 100), sdf), "pct1.rds", update = FALSE)
  # percentiles should be -1* their value when multiplied by -1 and taken in reverse order
  tmpDat <- getData(data = sdf, varnames = c("composite", "origwt"), addAttributes = TRUE)
  pvs <- getPlausibleValue(data = sdf, "composite")
  for (pvi in pvs) {
    tmpDat[, pvi] <- -1 * tmpDat[, pvi]
  }
  pct1i <- percentile("composite", rev(c(0, 1, 25, 50, 75, 99, 100)), data = tmpDat, confInt = FALSE)
  expect_equal(pct1$estimate, -1 * pct1i$estimate)
  # range should agree with 0th and 100th percentile
  pct0 <- percentile("mrpcm1", c(0, 100), data = sdf)
  expect_equal(pct0$estimate, range(sdf$mrpcm1))
})

######################## GAP TESTS FAIL ################
context("return VarEstInputs")
lm1 <- lm.sdf(~ dsex + b017451, sdf, returnVarEstInputs = TRUE)
expect_known_value(lm1$varEstInputs, "lm_varest.rds", update = FALSE)
test_that("return VarEstInputs", {
  skip_on_cran()
  es1 <- edsurveyTable(composite ~ dsex + b017451, sdf, jrrIMax = 1, returnVarEstInputs = TRUE)
  expect_known_value(list(es1$meanVarEstInputs, es1$pctVarEstInputs), file = "est_varest.rds", update = FALSE)
  g1 <- gap("composite", sdf, dsex == "Male", returnVarEstInputs = TRUE)
  expect_known_value(list(g1$varEstInputs, g1$pctVarEstInputs), file = "gap1_varest.rds", update = FALSE)
  g2 <- gap("b017451", sdf, dsex == "Male", targetLevel = "Once every few weeks", returnVarEstInputs = TRUE)
  expect_known_value(list(g2$varEstInputs, g2$pctVarEstInputs), file = "gap2_varest.rds", update = FALSE)
  g3 <- gap("composite", sdf, dsex == "Male", returnVarEstInputs = TRUE, achievementLevel = c("At or aboVe Bas"))
  expect_known_value(list(g3$varEstInputs, g3$pctVarEstInputs), file = "gap3_varest.rds", update = FALSE)
  g3d <- gap("composite", sdf, dsex == "Male", returnVarEstInputs = TRUE, achievementLevel = c("At Basic"), achievementDiscrete = TRUE)
  expect_known_value(list(g3d$varEstInputs, g3d$pctVarEstInputs), file = "gap3d_varest.rds", update = FALSE)
  g4 <- gap("composite", sdf, dsex == "Male", percentiles = c(50), returnVarEstInputs = TRUE)
  expect_known_value(list(g4$varEstInputs, g4$pctVarEstInputs), file = "gap4_varest.rds", update = FALSE)
  pct1 <- percentile("composite", c(2, 50), sdf, returnVarEstInputs = TRUE)
  expect_known_value(attributes(pct1)$varEstInputs, file = "pct_varest.rds", update = FALSE)
  test1 <- achievementLevels(returnCumulative = TRUE, data = sdf, returnVarEstInputs = TRUE)
  expect_known_value(list(test1$discVarEstInputs, test1$cumVarEstInputs), file = "aLevels_varest.rds", update = FALSE)
  es1re <- edsurveyTable(composite ~ dsex + b017451, sdf, jrrIMax = 1, returnVarEstInputs = TRUE, recode = list(dsex = list(from = "Male", to = "MALE")))
  expect_known_value(list(es1re$meanVarEstInputs, es1re$pctVarEstInputs), file = "est_varest_recode.rds", update = FALSE)
  # add test per issue 671
  glab <- gap("composite", sdf, dsex == "Male", dsex == "Female", achievementLevel = c("below Basic", "At Bas"), achievementDiscrete = TRUE)
  expect_equal(glab$results$achievementLevel, c("below Basic", "At Basic"))
})

context("gap")
test_that("gap", {
  # gap SD
  expect_known_value(g0 <- gap("composite", sdf, dsex == "Male", dsex == "Female", returnSimpleDoF = TRUE, stDev = TRUE), "gap_main_SD.rds", update = FALSE)
  # gap means
  expect_known_value(g1 <- gap("composite", sdf, dsex == "Male", dsex == "Female", returnSimpleDoF = TRUE), "gap_main_mean.rds", update = FALSE)
  g1q <- gap("composite", sdf, "dsex==\"Male\"", "dsex==\"Female\"", returnSimpleDoF = TRUE)
  g1q$call <- g1$call # the call is different, so fix that
  expect_known_value(g1q, "gap_main_mean.rds", update = FALSE)
  skip_on_cran()
  # gap percentile
  expect_known_value(g2p <- gap("composite", sdf, dsex == "Male", dsex == "Female", percentile = c(0, 50, 98), returnSimpleDoF = TRUE), "gap_main_percentile.rds", update = FALSE)
  g2pq <- gap("composite", sdf, "dsex==\"Male\"", "dsex==\"Female\"", percentile = c(0, 50, 98), returnSimpleDoF = TRUE)
  g2pq$call <- g2p$call # the call is different, so fix that
  expect_known_value(g2pq, "gap_main_percentile.rds", update = FALSE)
  # gap achievement levels, discrete
  expect_known_value(g1al <- gap("composite", sdf, dsex == "Male", dsex == "Female", achievementLevel = "Prof", achievementDiscrete = TRUE), "gap_AL1.rds", update = FALSE)

  # check use of achievementLevel in results, that results agree across discrete and cumulative
  ga1 <- gap("composite", sdf, dsex == "Male", dsex == "Female", achievementLevel = "Advanced")
  ga2 <- gap("composite", sdf, dsex == "Male", dsex == "Female", achievementLevel = "Adv", achievementDiscrete = TRUE)
  ga1$call <- NULL
  ga2$call <- NULL
  expect_equal(ga1, ga2)
  expect_equal(ga1$results$achievementLevel, "At Advanced")
  expect_equal(ga2$results$achievementLevel, "At Advanced")

  gp1 <- gap("composite", sdf, dsex == "Male", dsex == "Female", achievementLevel = "Proficient")
  gp2 <- gap("composite", sdf, dsex == "Male", dsex == "Female", achievementLevel = "Prof", achievementDiscrete = TRUE)

  # cumulative should equal sum of discrete
  expect_equal(gp2$results$estimateA + ga1$results$estimateA, gp1$results$estimateA)
  expect_equal(gp2$results$estimateB + ga1$results$estimateB, gp1$results$estimateB)
  expect_equal(gp1$results$achievementLevel, "At or Above Proficient")
  expect_equal(gp2$results$achievementLevel, "At Proficient")

  # and cumulative, with multiple levels
  expect_known_value(g2al <- gap("composite", sdf, dsex == "Male", dsex == "Female", achievementLevel = c("Proficient", "Basic")), "gap_AL2.rds", update = FALSE)
  # gap percentage with recode
  expect_known_value(g1eq <- gap("b017451", sdf, dsex == "Male", dsex == "Female", targetLevel = "Never or hardly ever", returnSimpleDoF = TRUE), "gap_percentage1.rds", update = FALSE)
  expect_known_value(g2eq <- gap("b017451", sdf, dsex == "Male", dsex == "Female",
    targetLevel = "Infrequently",
    recode = list(b017451 = list(
      from = c(
        "Never or hardly ever",
        "Once every few weeks",
        "About once a week"
      ),
      to = c("Infrequently")
    )),
    returnSimpleDoF = TRUE
  ), "gap_percentage2.rds", update = FALSE)
  # Taylor warning
  expect_warning(gap("composite", sdf, dsex == "Male", dsex == "Female", varMethod = "Taylor"), "deprecated")
})


context("achievementLevel basic")
test_that("achievementLevel basic", {
  expect_known_value(test1 <- achievementLevels(returnCumulative = TRUE, data = sdf), file = "aLevels_test1.rds", update = FALSE)
})

context("achievementLevel, aggregated")
test_that("achievementLevel, aggregated", {
  skip_on_cran()
  expect_known_value(test2 <- achievementLevels(aggregateBy = "dsex", returnCumulative = TRUE, data = sdf), file = "aLevels_test2.rds", update = FALSE)
  # test dynamic vars
  expect_known_value(test2 <- achievementLevels(aggregateBy = dsex, returnCumulative = TRUE, data = sdf), file = "aLevels_test2.rds", update = FALSE)
  dsexVar <- "dsex"
  expect_known_value(test2 <- achievementLevels(aggregateBy = dsexVar, returnCumulative = TRUE, data = sdf), file = "aLevels_test2.rds", update = FALSE)
  skip_on_cran()
  # return to achievementLevels
  expect_known_value(test3 <- achievementLevels(aggregateBy = c("sdracem"), returnCumulative = TRUE, data = sdf), file = "aLevels_test3.rds", update = FALSE)
  expect_known_value(test4 <- achievementLevels("sdracem", aggregateBy = c("composite"), data = sdf, returnCumulative = TRUE), file = "aLevels_test4.rds", update = FALSE)
  expect_known_value(test5 <- achievementLevels("dsex", aggregateBy = c("composite"), data = sdf, returnCumulative = TRUE), file = "aLevels_test5.rds", update = FALSE)
  # Use recode to change values for specified variables:
  expect_known_value(test6 <- achievementLevels(c("composite", "dsex", "b017451"),
    aggregateBy = "dsex", sdf,
    recode = list(
      b017451 = list(
        from = c(
          "Never or hardly ever",
          "Once every few weeks",
          "About once a week"
        ),
        to = c("Infrequently")
      ),
      b017451 = list(
        from = c("2 or 3 times a week", "Every day"),
        to = c("Frequently")
      )
    )
  ), file = "aLevels_test6.rds", update = FALSE)
})

context("achievementLevel many interactions")
test_that("achievementLevel many interactions", {
  skip_on_cran()
  expect_known_value(test7 <- achievementLevels(c("composite", "ell3", "lep", "pared", "b017451"), data = sdf, returnCumulative = TRUE), file = "aLevels_test7.rds", update = FALSE)

  # iparse tests for passing different variable names
  xCols <- c("lep", "pared", "b017451")
  test7a <- achievementLevels(c("composite", "ell3", xCols), data = sdf, returnCumulative = TRUE)
  test7b <- achievementLevels(c(composite, ell3, xCols), data = sdf, returnCumulative = TRUE)
  expect_equal(test7, test7a)
  expect_equal(test7, test7b)
  expect_equal(test7a, test7b)
})

context("achievementLevel with result of zero")
test_that("achievementLevel with result of zero", {
  sdfC <- subset(sdf, scrpsu %in% 100:200)
  expect_known_value(test9 <- achievementLevels(data = subset(sdfC, pared == 1)), file = "aLevels_test9.rds", update = FALSE)
})

# tests based on sdf
context("Test correlations on SDF")
test_that("sdf correlation", {
  suppressMessages(expect_is(expect_c1_pear <- cor.sdf("b017451", "b003501", sdf, method = "Pearson", weightVar = "origwt"), "edsurveyCor"))
  skip_on_cran()
  expect_is(c1_spear <- cor.sdf("b017451", "b003501", sdf, method = "Spearman", weightVar = "origwt"), "edsurveyCor")
  # use dynamic variables
  assign(x = "b17", value = c("b017451"), envir = globalenv())
  assign(x = "b35", value = c("b003501"), envir = globalenv())
  expect_is(c1_polyc <- cor.sdf(b17, b35, sdf, method = "Polychoric", weightVar = "origwt"), "edsurveyCor") # takes awhile
  rm("b17", envir = globalenv())
  rm("b35", envir = globalenv())
  expect_is(c1_polycB <- cor.sdf(b017451, b003501, sdf, method = "Polychoric", weightVar = "origwt"), "edsurveyCor") # takes awhile
  expect_equal(c1_polyc, c1_polycB)

  sdf_dnf <- EdSurvey:::subset(sdf, b003601 == 1, verbose = FALSE)
  suppressMessages(expect_is(c2_pear <- cor.sdf("composite", "b017451", sdf_dnf, method = "Pearson", weightVar = "origwt"), "edsurveyCor"))
  expect_is(c2_spear <- cor.sdf("composite", "b017451", sdf_dnf, method = "Spearman", weightVar = "origwt"), "edsurveyCor")
  expect_is(c2_polys <- cor.sdf("composite", "b017451", sdf_dnf, method = "Polyserial", weightVar = "origwt"), "edsurveyCor")
})

context("In cor, variables as class character return errors")
test_that("In cor, variables as class", {
  skip_on_cran()
  df <- getData(sdf, c("b017451", "sdracem", "origwt"), addAttributes = TRUE)
  df$sdracem <- as.character(df$sdracem)
  expect_error(cor.sdf("b017451", "sdracem", df, method = "Pearson"))
})

context("Reordering a variable manually vs through cor.sdf")
test_that("Reordering a variable manually vs through cor.sdf", {
  skip_on_cran()
  gddat <- getData(sdf, c("b017451", "sdracem", "origwt"), addAttributes = TRUE)
  # cast sdracem as a character and then reorder based on that
  gddat$sdracem <- lfactor(as.character(gddat$sdracem), levels = c(1, 2, 3, 4, 5, 6), labels = c("White", "Hispanic", "Black", "Asian/Pacific Island", "Amer Ind/Alaska Natv", "Other"))
  cor3 <- cor.sdf("b017451", "sdracem", sdf, method = "Pearson", weightVar = "origwt", reorder = list(sdracem = c("White", "Hispanic", "Black", "Asian/Pacific Island", "Amer Ind/Alaska Natv", "Other")))
  cor4 <- cor.sdf("b017451", "sdracem", gddat, method = "Pearson", weightVar = "origwt")
  expect_equal(cor3, cor4)

  gddat$sdracem[gddat$sdracem == "Hispanic"] <- "White"
  gddat$sdracem <- factor(gddat$sdracem, levels = c("White", "Black", "Asian/Pacific Island", "Amer Ind/Alaska Natv", "Other"))
  cor1Pe <- cor.sdf("b017451", "sdracem", sdf, method = "Pearson", weightVar = "origwt", recode = list(sdracem = list(from = "Hispanic", to = "White")))
  cor2Pe <- cor.sdf("b017451", "sdracem", gddat, method = "Pearson", weightVar = "origwt")
  expect_equal(cor1Pe, cor2Pe)
  # here lfacor not condensed
  cor3Pe <- cor.sdf("b017451", "sdracem", sdf, method = "Pearson", weightVar = "origwt", recode = list(sdracem = list(from = "Hispanic", to = "White")), condenseLevels = FALSE)
  expect_equal(cor3Pe$correlation, -0.00299202137069835)
  #
  cor4Pe <- cor.sdf("b017451", "sdracem", gddat, method = "Pearson", weightVar = "origwt", condenseLevels = FALSE)
  expect_equal(cor4Pe$correlation, -0.00232480033396954)

  cor1Sp <- cor.sdf("b017451", "sdracem", sdf, method = "Spearman", weightVar = "origwt", recode = list(sdracem = list(from = "Hispanic", to = "White")))
  cor2Sp <- cor.sdf("b017451", "sdracem", gddat, method = "Spearman", weightVar = "origwt")
  expect_equal(cor1Sp, cor2Sp)

  cor1pc <- cor.sdf("b017451", "sdracem", sdf, method = "Polychoric", weightVar = "origwt", recode = list(sdracem = list(from = "Hispanic", to = "White")))
  cor2pc <- cor.sdf("b017451", "sdracem", gddat, method = "Polychoric", weightVar = "origwt")
  expect_equal(cor1pc, cor2pc)

  sdf_dnf <- EdSurvey:::subset(sdf, sdracem == 5 | sdracem == 3 | sdracem == 1, verbose = FALSE)
  cc1_pear <- cor.sdf("b017451", "sdracem", sdf_dnf, method = "Pearson", weightVar = "origwt", recode = list(sdracem = list(from = 3, to = 1)))
  cc2_pear <- cor.sdf("b017451", "sdracem", sdf_dnf, method = "Pearson", weightVar = "origwt", recode = list(sdracem = list(from = "Hispanic", to = "White")))
  expect_equal(cc1_pear, cc2_pear)
  cc1_spear <- cor.sdf("b017451", "sdracem", sdf_dnf, method = "Spearman", weightVar = "origwt", recode = list(sdracem = list(from = 3, to = 1)))
  cc2_spear <- cor.sdf("b017451", "sdracem", sdf_dnf, method = "Spearman", weightVar = "origwt", recode = list(sdracem = list(from = "Hispanic", to = "White")))
  expect_equal(cc1_spear, cc2_spear)
  cc1_polyc <- cor.sdf("b017451", "sdracem", sdf_dnf, method = "Polychoric", weightVar = "origwt", recode = list(sdracem = list(from = 3, to = 1)))
  cc2_polyc <- cor.sdf("b017451", "sdracem", sdf_dnf, method = "Polychoric", weightVar = "origwt", recode = list(sdracem = list(from = "Hispanic", to = "White")))
  expect_equal(cc1_polyc, cc2_polyc)
})

context("cor.sdf no level condensation")
test_that("cor.sdf no level condensation", {
  skip_on_cran()
  cor_nocondense <- cor.sdf(x = "c046501", y = "c044006", data = sdf, condenseLevels = FALSE)
  withr::with_options(
    list(digits = 4),
    cor_nocondenseC <- capture.output(cor_nocondense)
  )
  expect_equal(cor_nocondenseC, cor_nocondenseREF)
})

context("unweighted cor")
test_that("unweighted cor", {
  skip_on_cran()
  b1a <- cor.sdf("m815401", "b017451", method = "Pearson", sdf, weightVar = "origwt")
  b1b <- cor.sdf("m815401", "b017451", method = "Pearson", sdf, weightVar = NULL)
  # not actually equal, just approximate
  expect_equal(b1a$correlation, b1b$correlation, tolerance = 0.02, scale = 1)

  b2a <- cor.sdf("m815401", "b017451", method = "Spearman", sdf, weightVar = "origwt")
  b2b <- cor.sdf("m815401", "b017451", method = "Spearman", sdf, weightVar = NULL)
  # not actually equal, just approximate
  expect_equal(b2a$correlation, b2b$correlation, tolerance = 0.02, scale = 1)
})


context("glm")
test_that("glm", {
  skip_on_cran()
  # data to test against
  logitDat <- getData(data = sdf, varnames = c("iep", "b017451", "dsex", "b013801", "origwt", "composite", "geometry"), dropOmittedLevels = FALSE, addAttributes = TRUE)

  # test logit with no PVs
  logit0 <- logit.sdf(I(iep %in% "Yes") ~ dsex + b013801, data = sdf)
  # run just coef
  logitDat0 <- getData(data = logitDat, varnames = c("iep", "dsex", "b013801", "origwt"), dropOmittedLevels = TRUE)
  logitDat0$iepY <- ifelse(logitDat0$iep %in% "Yes", 1, 0)
  suppressWarnings(ccoef <- coef(glm.sdf(iepY ~ dsex + b013801, data = logitDat0, weightVar = "origwt", family = binomial(link = "logit"))))
  expect_equal(coef(logit0), ccoef)

  logit1 <- logit.sdf(I(composite > 300) ~ dsex + b013801, data = sdf)
  logitDat1 <- getData(data = logitDat, varnames = c("composite", "dsex", "b013801", "origwt"), dropOmittedLevels = TRUE)
  ccoef <- sapply(getPlausibleValue("composite", sdf), function(ci) {
    logitDat1$outcome <- ifelse(logitDat1[, ci] > 300, 1, 0)
    suppressWarnings(coef(glm.sdf(outcome ~ dsex + b013801, data = logitDat1, weightVar = "origwt", family = binomial(link = "logit"))))
  })
  ccoef <- apply(ccoef, 1, mean)
  expect_equal(coef(logit1), ccoef, tolerance = 1E-6)

  # test logit with PVs, multiple tests on that PV
  logit1b <- logit.sdf(I(composite > 300 & composite < 350) ~ dsex + b013801, data = sdf)
  ccoef <- sapply(getPlausibleValue("composite", sdf), function(ci) {
    logitDat1$outcome <- ifelse(logitDat1[, ci] > 300 & logitDat1[, ci] < 350, 1, 0)
    suppressWarnings(coef(glm.sdf(outcome ~ dsex + b013801, data = logitDat1, weightVar = "origwt", family = binomial(link = "logit"))))
  })
  ccoef <- apply(ccoef, 1, mean)
  expect_equal(coef(logit1b), ccoef, tolerance = 1E-6)

  # two survey items
  logit2 <- logit.sdf(I(iep %in% "Yes" & b017451 %in% "Every day") ~ dsex + b013801, data = sdf)
  logitDat2 <- getData(data = logitDat, varnames = c("iep", "b017451", "dsex", "b013801", "origwt"), dropOmittedLevels = TRUE)
  logitDat2$outcome <- ifelse(logitDat2$iep %in% "Yes" & logitDat2$b017451 %in% "Every day", 1, 0)
  suppressWarnings(ccoef <- coef(glm.sdf(outcome ~ dsex + b013801, data = logitDat2, weightVar = "origwt", family = binomial(link = "logit"))))
  expect_equal(coef(logit2), ccoef, tolerance = 1E-6)

  # a PV with a relation and a survey item
  logit3 <- logit.sdf(I(composite > 300 & b017451 %in% "Every day") ~ dsex + b013801, data = sdf)
  logitDat3 <- getData(data = logitDat, varnames = c("composite", "dsex", "b017451", "b013801", "origwt"), dropOmittedLevels = TRUE)
  ccoef <- sapply(getPlausibleValue("composite", sdf), function(ci) {
    logitDat3$outcome <- ifelse(logitDat3[, ci] > 300 & logitDat3$b017451 %in% "Every day", 1, 0)
    suppressWarnings(coef(glm.sdf(outcome ~ dsex + b013801, data = logitDat3, weightVar = "origwt", family = binomial(link = "logit"))))
  })
  ccoef <- apply(ccoef, 1, mean)
  expect_equal(coef(logit3), ccoef, tolerance = 1E-6)

  # a relation between two PVs
  logit4 <- logit.sdf(I(composite > geometry) ~ dsex + b013801, data = sdf)
  logitDat4 <- getData(data = logitDat, varnames = c("composite", "geometry", "dsex", "b013801", "origwt"), dropOmittedLevels = TRUE)
  ccoef <- sapply(1:5, function(ii) {
    ci <- getPlausibleValue("composite", sdf)[ii]
    gi <- getPlausibleValue("geometry", sdf)[ii]
    logitDat4$outcome <- ifelse(logitDat4[, ci] > logitDat4[, gi], 1, 0)
    suppressWarnings(coef(glm.sdf(outcome ~ dsex + b013801, data = logitDat4, weightVar = "origwt", family = binomial(link = "logit"))))
  })
  ccoef <- apply(ccoef, 1, mean)
  expect_equal(coef(logit4), ccoef, tolerance = 1E-6)

  # two PVs with seperate relations
  logit4b <- logit.sdf(I(composite > 300 & geometry < 350) ~ dsex + b013801, data = sdf)
  ccoef <- sapply(1:5, function(ii) {
    ci <- getPlausibleValue("composite", sdf)[ii]
    gi <- getPlausibleValue("geometry", sdf)[ii]
    logitDat4$outcome <- ifelse(logitDat4[, ci] > 300 & logitDat4[, gi] < 350, 1, 0)
    suppressWarnings(coef(glm.sdf(outcome ~ dsex + b013801, data = logitDat4, weightVar = "origwt", family = binomial(link = "logit"))))
  })
  ccoef <- apply(ccoef, 1, mean)
  expect_equal(coef(logit4b), ccoef, tolerance = 1E-6)

  skip_on_cran()
  # test logit with PVs
  withr::with_options(
    list(digits = 4),
    co <- capture.output(summary(logit1))
  )
  expect_equal(co, logit1REF)
  logit2t <- logit.sdf(I(iep %in% "Yes" & b017451 %in% "Every day") ~ dsex + b013801, data = sdf, varMethod = "Taylor")
  co <- capture.output(summary(logit2t))
  expect_equal(co, logit2tREF)
})

context("Wald test")
test_that("Wald test", {
  # glm example
  skip_on_cran()
  myLogit <- logit.sdf(dsex ~ b017451 + b003501, data = sdf, returnNumberOfPSU = TRUE)
  wt_glm <- waldTest(model = myLogit, coefficients = 2:5)
  wt1 <- capture.output(wt_glm)
  expect_equal(wt1, wt1REF)

  # lm example
  fit <- lm.sdf(composite ~ dsex + b017451, data = sdf, returnNumberOfPSU = TRUE)
  wt_lm <- waldTest(model = fit, coefficients = "b017451")
  wt2 <- capture.output(wt_lm)
  expect_equal(wt2, wt2REF)

  # test weightVar with iparse (remove call as they won't match, but numbers will)
  fita <- lm.sdf(composite ~ dsex + b017451, data = sdf, weightVar = "origwt", returnNumberOfPSU = TRUE)
  fitb <- lm.sdf(composite ~ dsex + b017451, data = sdf, weightVar = origwt, returnNumberOfPSU = TRUE)
  fit$call <- NULL
  fita$call <- NULL
  fitb$call <- NULL
  expect_equal(fit, fita) # compare to original
  expect_equal(fit, fitb)
  expect_equal(fita, fitb) # compare to each other

  # lm example with Taylor
  fit <- lm.sdf(composite ~ dsex + b017451, data = sdf, varMethod = "Taylor", returnNumberOfPSU = TRUE)
  wt_lm <- waldTest(model = fit, coefficients = "b017451")
  wt3 <- capture.output(wt_lm)
  expect_equal(wt3, wt3REF)

  # lesdf example should return warning for waldTest because strata and PSU vars needed
  # used to be an error, but chi-sq is still possible, so now warning
  gddat <- getData(
    data = sdf,
    varnames = c("composite", "dsex", "b017451", "b003501", "origwt"),
    addAttributes = TRUE
  )
  myLogit2 <- logit.sdf(dsex ~ b017451 + b003501, data = gddat, returnVarEstInputs = TRUE)
  expect_warning(waldTest(myLogit2, coefficients = 2:5))

  # lesdf this example should work, including the PSU and stratum vars
  gddat <- getData(
    data = sdf,
    varnames = c("composite", "dsex", "b017451", "b003501", "origwt", getPSUVar(sdf), getStratumVar(sdf)),
    addAttributes = TRUE
  )
  myLogit <- logit.sdf(dsex ~ b017451 + b003501, data = gddat, returnVarEstInputs = TRUE)
  wt_lesdf <- waldTest(myLogit, coefficients = 2:5)
  wt4 <- capture.output(wt_lesdf)
  expect_equal(wt4, wt4REF)
})

context("edsurvey with $ method")
test_that("edsurvey with $ method", {
  # $ method work for existing attributes
  expect_equal(sdf$country, "USA")
  # $ method return a vector
  expect_equal(class(sdf$dsex), c("lfactor", "factor"))
  # that vector has data
  dsexTab <- structure(c(Male = 8486L, Female = 8429L),
    .Dim = 2L,
    .Dimnames = structure(list(c("Male", "Female")), .Names = ""),
    class = "table"
  )
  expect_equal(table(sdf$dsex), dsexTab)

  # $ method return a data.frame
  expect_equal(class(sdf$composite), "data.frame")
})


context("levelsSDF n")
test_that("levelsSDF n", {
  skip_on_cran()
  levelRes <- levelsSDF(varnames = "pared", data = sdf)
  sum2Res <- summary2(sdf, "pared")
  mergeRes <- merge(sum2Res$summary, levelRes$pared, by.x = "pared", by.y = "labels")
  expect_equal(mergeRes$N, mergeRes$n)
  sum2ResB <- summary2(sdf, c("dsex", "pared"))
  withr::with_options(
    list(digits = 2),
    co <- capture.output(sum2ResB)
  )
  expect_equal(co, sum2ResBREF)
})


context("levelsSDF with multiple recodes")
test_that("levelsSDF with multiple recodes", {
  skip_on_cran()
  # $ method work for existing attributes
  df <- recode.sdf(sdf, recode = list(
    t088301 = list(
      from = c("Yes, available", "Yes, I have access"),
      to = c("Yes")
    ),
    t088301 = list(
      from = c("No, have no access"),
      to = c("No")
    )
  ))
  df <- recode.sdf(df, recode = list(pared = list(
    from = c("Did not finish H.S.", "Graduated H.S."),
    to = c("Graduated High School")
  )))

  levelsSDFoutput <- c(
    "Levels for Variable 't088301' (Lowest level first):",
    "    8. Omitted* (n = 84)",
    "    0. Multiple* (n = 1)",
    "    9. Yes (n = 14683)",
    "    10. No (n = 323)",
    "    NOTE: * indicates an omitted level."
  )
  colsdf <- capture.output(levelsSDF("t088301", df))
  expect_equal(levelsSDFoutput, colsdf)
})

context("use returnNumberOfPSU")
test_that("use returnNumberOfPSU", {
  skip_on_cran()
  # percentile
  pctPSU <- percentile("composite", percentiles = c(10, 50), data = sdf, returnNumberOfPSU = TRUE)
  expect_equal(attr(pctPSU, "nPSU"), 124)
  # lm.sdf
  lmPSU <- lm.sdf(composite ~ dsex, data = sdf, returnNumberOfPSU = TRUE)
  expect_equal(lmPSU$nPSU, 124)
  # gap
  gapPSU <- gap("composite", data = sdf, groupA = dsex %in% "Male", groupB = dsex %in% "Female", returnNumberOfPSU = TRUE)
  expect_equal(capture.output(gapPSU), gapPSUREF)
})


context("summary2")
test_that("summary2", {
  # Weighted with PV
  skip_on_cran()
  sPV_w <- capture.output(summary2(sdf, c("composite", "algebra")))
  expect_equal(sPV_w, sPV_wREF)

  # Unweighted
  sPV <- capture.output(summary2(sdf, "composite", weightVar = NULL))
  expect_equal(sPV, sPVREF)

  # Weighted discrete
  withr::with_options(
    list(digits = 2),
    sDiscrete_w <- capture.output(summary2(sdf, c("b017451", "dsex")))
  )
  expect_equal(sDiscrete_w, sDiscrete_wREF)

  # Unweighted discrete
  withr::with_options(
    list(digits = 4),
    sDiscrete <- capture.output(summary2(sdf, "dsex", weightVar = NULL))
  )
  expect_equal(sDiscrete, sDiscreteREF)
})

context("rq.sdf")
test_that("rq.sdf", {
  skip_on_cran()
  options(width = 500)
  rq1 <- rq.sdf(composite ~ dsex + b017451, data = sdf, tau = 0.8)
  rq1c <- withr::with_options(list(digits = 2), capture.output(summary(rq1)))
  expect_equal(rq1c, rq1REF)
})

context("mml.sdf")
test_that("mml.sdf", {
  skip_on_cran()
  # run subtest
  invisible(withr::with_options(
    list(digits = 4),
    capture.output(
      mmlNAEP <- suppressWarnings(mml.sdf(algebra ~ 1, subset(sdf, dsex == "Female"), weightVar = "origwt", verbose = TRUE))
    )
  ))
  # capture output
  # intercept
  coInt <- withr::with_options(list(digits = 4), capture.output(mmlNAEP))
  expect_equal(coInt, mmlIntREF)
  # summary
  coSum <- withr::with_options(
    list(digits = 4),
    capture.output(summary(mmlNAEP))
  )
  # don't compare the 'iterations = ##' value, remove that from the comparison
  expect_equal(dropIterations(coSum), dropIterations(mmlSumREF))

  # ensure all test names match pvvar names and vis-versa
  testNames <- c(sdf$testData$subtest, sdf$testData$test)
  pvs <- names(sdf$pvvars)
  expect_true(all(pvs %in% testNames) && all(testNames %in% pvs))
  skip_on_cran()

  # test with a fixed up pvvar name
  invisible(withr::with_options(
    list(digits = 4),
    co <- capture.output(
      suppressWarnings(
        mml.sdf(data_anal_prob ~ 1, sdf, weightVar = "origwt", verbose = TRUE)
      )
    )
  ))

  coREF <- c(
    "  (Intercept) Population SD ",
    "       279.84         40.98 "
  )
  expect_equal(co, coREF)

  # run with regressor
  invisible(withr::with_options(
    list(digits = 4),
    capture.output(
      mmlDsexNaep <- suppressWarnings(mml.sdf(algebra ~ dsex, sdf, weightVar = "origwt", verbose = TRUE))
    )
  ))
  # capture output
  # intercept
  coDsexInt <- withr::with_options(
    list(digits = 2),
    capture.output(mmlDsexNaep)
  )

  expect_equal(coDsexInt, mmlDsexIntREF)
})

context("no PSU var error and warnings")
test_that("no PSU var error and warnings", {
  skip_on_cran()
  # these warnings relatete to missing PSU so the count of PSUs will not be returned or
  # errors about how Taylor series is not possible without a PSU var
  expect_warning(
    sdfNoPSU <- edsurvey.data.frame(
      userConditions = sdf$userConditions,
      defaultConditions = sdf$defaultConditions,
      dataList = sdf$dataList,
      weights = sdf$weights,
      pvvars = sdf$pvvars,
      subject = sdf$subject,
      year = sdf$year,
      assessmentCode = sdf$assessmentCode,
      dataType = sdf$dataType,
      gradeLevel = sdf$gradeLevel,
      achievementLevels = sdf$achievementLevels,
      omittedLevels = sdf$omittedLevels,
      survey = sdf$survey,
      country = sdf$country,
      psuVar = NULL, # remove the PSU var for testing
      stratumVar = NULL, # remove the stratum var for testing
      jkSumMultiplier = sdf$jkSumMultiplier,
      recodes = sdf$recodes,
      validateFactorLabels = sdf$validateFactorLabels,
      forceLower = TRUE,
      reqDecimalConversion = sdf$reqDecimalConversion,
      fr2Path = sdf$fr2Path,
      dim0 = sdf$dim0
    ), "Taylor series"
  )
  expect_warning(rq1 <- rq.sdf(composite ~ dsex + b017451, data = sdfNoPSU, tau = 0.8, returnNumberOfPSU = TRUE), "returnNumberOfPSU")
  expect_error(res <- edsurveyTable(composite ~ dsex, data = sdfNoPSU, varMethod = "Taylor"), "jackknife")
  expect_error(res <- glm.sdf(composite ~ dsex, data = sdfNoPSU, varMethod = "Taylor", family = binomial()), "primary sampling unit")
  expect_warning(res <- glm.sdf(composite ~ dsex, data = sdfNoPSU, family = binomial(), returnNumberOfPSU = TRUE), "FALSE")
  expect_warning(res <- lm.sdf(composite ~ dsex, data = sdfNoPSU, returnNumberOfPSU = TRUE), "FALSE")
  expect_error(res <- lm.sdf(composite ~ dsex, data = sdfNoPSU, varMethod = "Taylor"), "jackknife")
  expect_error(res <- lm.sdf(composite ~ dsex, data = sdfNoPSU, varMethod = "T", returnNumberOfPSU = TRUE), "jackknife")
})
Any scripts or data that you put into this service are public.
EdSurvey documentation built on June 27, 2024, 5:10 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EdSurvey
Analysis of NCES Education Survey and Assessment Data

tests/testthat/test-10-main.R
In EdSurvey: Analysis of NCES Education Survey and Assessment Data

Try the EdSurvey package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EdSurvey Analysis of NCES Education Survey and Assessment Data

tests/testthat/test-10-main.R In EdSurvey: Analysis of NCES Education Survey and Assessment Data

Try the EdSurvey package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EdSurvey
Analysis of NCES Education Survey and Assessment Data

tests/testthat/test-10-main.R
In EdSurvey: Analysis of NCES Education Survey and Assessment Data