tests/testthat/test-sql_cooc.R

test_sql_cooc = function() {

  library(RSQLite)

  df_ehr = data.frame(Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
                      Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                      Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2',
                                      'C3', 'C4'),
                      Count = 1:9)

  test_db_path = tempfile()
  test_db = dbConnect(SQLite(), test_db_path)
  dbWriteTable(test_db, 'df_monthly', df_ehr, overwrite = TRUE)

  dbExecute(test_db, "CREATE INDEX patient_idx ON df_monthly (Patient)")

  df_uniq_codes = unique(df_ehr['Parent_Code'])
  dbWriteTable(test_db, 'df_uniq_codes', df_uniq_codes, overwrite = TRUE)

  dbDisconnect(test_db)

  output_db_path = tempfile()
  sql_cooc(test_db_path, output_db_path)

  test_db = dbConnect(SQLite(), output_db_path)
  df_spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;')
  dbDisconnect(test_db)

  expect_equal(df_spm_cooc,
               data.frame(V1 = c('C1', 'C1', 'C2', 'C3', 'C4'),
                          V2 = c('C1', 'C2', 'C2', 'C3', 'C4'),
                          value = c(16, 7, 12, 8, 9)))




  # exclude patterns, codes_dict, autoindex, overwrite_output

  df_ehr$Parent_Code %<>% ifelse(. == 'C1', 'C0000545', .)
  df_ehr$Parent_Code %<>% ifelse(. == 'C2', 'C0000578', .)

  file.remove(test_db_path)
  test_db = dbConnect(SQLite(), test_db_path)
  dbWriteTable(test_db, 'df_monthly', df_ehr)

  dbDisconnect(test_db)


  codes_dict_fpaths = list.files(system.file('dictionaries', package = 'nlpembeds'),
                          full.names = TRUE)

  sql_cooc(test_db_path, output_db_path,
           exclude_code_pattern = 'C4',
           exclude_dict_pattern = 'C[0-9]',
           codes_dict_fpaths = codes_dict_fpaths,
           autoindex = TRUE, overwrite_output = TRUE)

  test_db = dbConnect(SQLite(), output_db_path)
  df_spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;')
  dbDisconnect(test_db)

  expect_equal(df_spm_cooc,
               data.frame(V1 = c('C0000545', 'C0000545', 'C0000578'),
                          V2 = c('C0000545', 'C0000578', 'C0000578'),
                          value = c(16, 7, 12)))
}
test_that('sql_cooc', test_sql_cooc())




# debug


test_cooc_batch = function() {

  df_ehr_batch1 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                      Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
                      Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2',
                                      'C3', 'C4'),
                      Count = 1:9)

  df_ehr_batch2 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                      Patient = c(5, 5, 6, 5, 6, 5, 5, 7, 8),
                      Parent_Code = c('C5', 'C6', 'C6', 'C5', 'C5', 'C1', 'C2',
                                      'C3', 'C4'),
                      Count = 1:9)

  df_ehr = df_ehr_batch1
  df_ehr = df_ehr_batch2

  l_pat_ids = nlpembeds:::batch_splits(1:8, n_batch = 3)

  ## 

  df_cooc = data.frame(Var1 = c(rep('C1', 3), rep('C2', 4)),
                       Var2 = c(rep(c('C1', 'C2', 'C2'), 2), 'C3'),
                       value = 1:7)

##


  spm_cooc = spm_cooc_batch
  code_freqs = Matrix::diag(spm_cooc)
  expect_true(!any(is.na(code_freqs)))
  expect_true(!any(code_freqs == 0))
  # only appears when adding uniq_codes, expected for batches but not at end


}
#test_that('cooc_batch', test_cooc_batch())

Try the nlpembeds package in your browser

Any scripts or data that you put into this service are public.

nlpembeds documentation built on April 4, 2025, 4:41 a.m.