tests/testthat/test-cooc.R

# in first month 2 patients, 2 codes, one twice
# second two patients, one code
# third one patient 2 codes
# devtools::load_all()
df_ehr = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                    Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
                    Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2',
                                    'C3', 'C4'),
                    Count = 1:9)
spm_cooc = build_df_cooc(df_ehr)

test_build_df_cooc = function() {

  expect_true(inherits(spm_cooc, 'sparseMatrix'))

  # in diagonal, no NAs, no 0s
  code_freqs = Matrix::diag(spm_cooc)
  expect_true(!any(is.na(code_freqs)))
  expect_true(!any(code_freqs == 0))

  # in coocs, none above the diag
  above_diag = sapply(seq_along(code_freqs), function(idx) {
      any(spm_cooc[, idx] > code_freqs[idx])
    })
  expect_true(!any(above_diag))

  # try with uniq_codes

df_ehr_batch1 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                    Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
                    Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2',
                                    'C3', 'C4'),
                    Count = 1:9)

df_ehr_batch2 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                    Patient = c(5, 5, 6, 5, 6, 5, 5, 7, 8),
                    Parent_Code = c('C5', 'C6', 'C6', 'C5', 'C5', 'C1', 'C2',
                                    'C3', 'C4'),
                    Count = 1:9)

df_ehr_batch3 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                    Patient = c(5, 5, 6, 5, 6, 5, 5, 7, 8),
                    Parent_Code = c('C5', 'C6', 'C6', 'C5', 'C5', 'C1', 'C2',
                                    'C7', 'C8'),
                    Count = 1:9)

uniq_codes = paste0('C', 1:8)

spm_cooc_batch = build_df_cooc(df_ehr_batch1, uniq_codes, min_code_freq = 0)
spm_cooc_batch = spm_cooc_batch + build_df_cooc(df_ehr_batch2, uniq_codes, min_code_freq = 0)
spm_cooc_batch + build_df_cooc(df_ehr_batch3, uniq_codes, min_code_freq = 0)
# there's your error

# also just with 
build_df_cooc(df_ehr_batch3, uniq_codes)

# only affects code freqs, not code coocs
df_ehr_batch1 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                    Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
                    Parent_Code = c('C7', 'C8', 'C8', 'C7', 'C7', 'C7', 'C8',
                                    'C3', 'C4'),
                    Count = 1:9)
build_df_cooc(df_ehr_batch1, uniq_codes)


}
test_that('build_df_cooc', test_build_df_cooc())

test_build_df_cooc_cuis = function() {

  # with cuis

  # devtools::load_all()

    #5 x 5 sparse Matrix of class "dgCMatrix"
    #       BACITRACIN C0000726 C0000729 C0000731 C0000737
#BACITRACIN         15        8        1        1        2
#C0000726            .      707       16       27       79
#C0000729            .        .       19        3        5
#C0000731            .        .        .       31       10
#C0000737            .        .        .        .      110


  df_ehr = get(load('df_ehr_10_patients_cuis.rds'))

  #library(profvis)
  #p = profvis({ df_cooc = build_df_cooc(df_ehr) })
  #htmlwidgets::saveWidget(p, file = 'profiling.html')

  system.time({
      df_cooc = build_df_cooc(df_ehr, n_cores = 3)
    })


  spm_cooc = df_cooc
  code_freqs = Matrix::diag(spm_cooc)
  expect_true(!any(is.na(code_freqs)))
  expect_true(!any(code_freqs == 0))

}
#test_that('build_df_cooc_cuis', test_build_df_cooc_cuis())

test_spm_to_df = function() {

#  obj = "structure(list(V1 = c(\"C1\", \"C1\", \"C2\", \"C3\", \"C4\"),
#                        V2 = c(\"C1\", \"C2\", \"C2\", \"C3\", \"C4\"),
#                        value = c(16, 7, 12, 8, 9)), row.names = c(NA, -5L),
#                        header = \"4 x 4 sparse Matrix of class \\\"dgCMatrix\\\", with 5 entries\",
#                        class = \"data.frame\")"

   obj = "structure(list(V1 = c(\"C1\", \"C1\", \"C2\", \"C3\", \"C4\"),
                         V2 = c(\"C1\", \"C2\", \"C2\", \"C3\", \"C4\"),
                         value = c(16, 7, 12, 8, 9)), row.names = c(NA, -5L),
                         class = \"data.frame\")"

  expect_equal(spm_to_df(spm_cooc), eval(parse(text = obj)))
}
test_that('spm_to_df', test_spm_to_df())




if (FALSE) {
  # larger tests
  # devtools::load_all()

  #system.time({
  set.seed(1)
  n_rows = 2e3 * 4 * 12 / 2
  df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
          Patient = sample(1:4, n_rows, replace = TRUE),
          Month = sample(1:12, n_rows, replace = TRUE),
          Count = sample(1:9, n_rows, replace = TRUE))
  df_ehr = subset(df_ehr, !duplicated(paste0('P', Patient, 'M', Month, 'C', Parent_Code)))

  df_cooc = build_df_cooc(df_ehr)

  #library(profvis)
  #p = profvis({df_cooc = build_df_cooc(df_ehr)})
  #htmlwidgets::saveWidget(p, file = 'profiling.html')
  #})

  # devtools::load_all()

  set.seed(1)
  n_rows = 2e3 * 4 * 12 / 2
  df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
          Patient = sample(1:4, n_rows, replace = TRUE),
          Month = sample(1:12, n_rows, replace = TRUE),
          Count = sample(1:9, n_rows, replace = TRUE))
  df_ehr = subset(df_ehr, !duplicated(paste0('P', Patient, 'M', Month, 'C', Parent_Code)))

  system.time({
  df_cooc = build_df_cooc(df_ehr)
  })
  df_cooc[1:5, 1:5]
  df_cooc = build_df_cooc(df_ehr,n_cores = 4)

  # 1 patient

  set.seed(1)
  n_rows = 2e3 * 4 * 12 / 2
  df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
          Patient = sample(1, n_rows, replace = TRUE),
          Count = sample(1:9, n_rows, replace = TRUE),
          Month = sample(1:12, n_rows, replace = TRUE))

  df_cooc = build_df_cooc(df_ehr)
  df_cooc[1:5, 1:5]

  # 1 month

  set.seed(1)
  n_rows = 2e3 * 4 * 12 / 2
  df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
          Patient = sample(1, n_rows, replace = TRUE),
          Count = sample(1:9, n_rows, replace = TRUE),
          Month = sample(1, n_rows, replace = TRUE))

  df_cooc = build_df_cooc(df_ehr)
  df_cooc[1:5, 1:5]

}

Try the nlpembeds package in your browser

Any scripts or data that you put into this service are public.

nlpembeds documentation built on April 4, 2025, 4:41 a.m.