tests/testthat/test-textmodel_lsa.R

library("quanteda")

test_that("textmodel-lsa (rsvd) works as expected as lsa", {
    skip_if_not_installed("lsa")
    library("lsa")

    foxmatrix <- c(1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1,1)
    dim(foxmatrix) <- c(3, 4)
    rownames(foxmatrix) <- paste0("D", seq(1:3))
    #{lsa}
    foxlsaMatrix <- as.textmatrix(t(foxmatrix))
    #myMatrix <- lw_logtf(foxlsaMatrix) * gw_idf(foxlsaMatrix)

    myLSAspace <- suppressWarnings(lsa(foxlsaMatrix, dims = 2))

    #quanteda
    foxdfm <- as.dfm(foxmatrix)
    qtd_lsa <- textmodel_lsa(foxdfm, nd = 2)

    expect_equivalent(abs(qtd_lsa$docs), abs(myLSAspace$dk))
    expect_equivalent(abs(qtd_lsa$features), abs(myLSAspace$tk))
    expect_equivalent(abs(qtd_lsa$sk), abs(myLSAspace$sk))
})

test_that("predict works as expected as lsa::fold_in()", {
    skip_if_not_installed("lsa")
    library("lsa")

    foxmatrix <- c(1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1)
    dim(foxmatrix) <- c(3, 4)
    rownames(foxmatrix) <- paste0("D", seq(1:3))

    newfox <- matrix(c(1, 0, 1, 0, 1, 1, 0, 0), nrow = 2, ncol = 4, byrow = TRUE)
    rownames(newfox) <- paste0("D", c(4:5))
    #{lsa}
    foxlsaMatrix <- as.textmatrix(t(foxmatrix))
    myLSAspace <- suppressWarnings(lsa(foxlsaMatrix, dims = 2))
    newSpace <- t(fold_in(t(newfox), myLSAspace))
    newSpace <- newSpace[ , ]
    #quanteda
    foxdfm <- as.dfm(foxmatrix)
    qtd_lsa <- textmodel_lsa(foxdfm, nd = 2)
    new_qtd_lsa <- predict(qtd_lsa, newfox)

    expect_equivalent(round(abs(new_qtd_lsa$matrix_low_rank), digits = 3), round(abs(newSpace), digits = 3))

})

test_that("textmodel-lsa (rsvd) works as expected as lsa::as.textmatrix()", {
    skip_if_not_installed("lsa")

    foxmatrix <- c(1, 0, 1, 2, 0, 1, 1, 0, 3, 0, 1, 1)
    dim(foxmatrix) <- c(3, 4)
    rownames(foxmatrix) <- paste0("D", seq(1:3))
    #{lsa}
    foxlsaMatrix <- as.textmatrix(t(foxmatrix))
    #myMatrix <- lw_logtf(foxlsaMatrix) * gw_idf(foxlsaMatrix)

    myLSAspace <- lsa(foxlsaMatrix, dims = 2)
    foxlsaMatrix_lowRank <- as.textmatrix(myLSAspace)
    foxlsaMatrix_lowRank <- foxlsaMatrix_lowRank[ , ]
    #quanteda
    foxdfm <- as.dfm(foxmatrix)
    qtd_lsa <- textmodel_lsa(foxdfm, nd = 2)

    expect_equivalent(round(abs(qtd_lsa$matrix_low_rank), 3), round(abs(t(foxlsaMatrix_lowRank)), 3))
})

test_that("textmodel-lsa works with margin argument", {
    ie_dfm <- dfm(tokens(data_corpus_irishbudget2010))
    ie_lsa1 <- textmodel_lsa(ie_dfm, margin = 'both')
    expect_equal(dim(ie_lsa1$matrix_low_rank), dim(ie_dfm))
    expect_true(is.dfm(as.dfm(ie_lsa1)))

    ie_lsa2 <- textmodel_lsa(ie_dfm, margin = 'documents')
    expect_equal(dim(ie_lsa2$matrix_low_rank), c(10, nfeat(ie_dfm)))
    expect_true(is.dfm(as.dfm(ie_lsa2)))

    ie_lsa3 <- textmodel_lsa(ie_dfm, margin = 'features')
    expect_equal(dim(ie_lsa3$matrix_low_rank), c(ndoc(ie_dfm), 10))
    expect_true(is.dfm(as.dfm(ie_lsa3)))
})

test_that("raises error when dfm is empty (#1419)",  {
    mx <- dfm_trim(data_dfm_lbgexample, 1000)
    expect_error(textmodel_lsa(mx),
                 quanteda.textmodels:::message_error("dfm_empty"))

})

Try the quanteda.textmodels package in your browser

Any scripts or data that you put into this service are public.

quanteda.textmodels documentation built on March 31, 2023, 8:09 p.m.