Nothing
# in first month 2 patients, 2 codes, one twice
# second two patients, one code
# third one patient 2 codes
# devtools::load_all()
df_ehr = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2',
'C3', 'C4'),
Count = 1:9)
spm_cooc = build_df_cooc(df_ehr)
test_build_df_cooc = function() {
expect_true(inherits(spm_cooc, 'sparseMatrix'))
# in diagonal, no NAs, no 0s
code_freqs = Matrix::diag(spm_cooc)
expect_true(!any(is.na(code_freqs)))
expect_true(!any(code_freqs == 0))
# in coocs, none above the diag
above_diag = sapply(seq_along(code_freqs), function(idx) {
any(spm_cooc[, idx] > code_freqs[idx])
})
expect_true(!any(above_diag))
# try with uniq_codes
df_ehr_batch1 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2',
'C3', 'C4'),
Count = 1:9)
df_ehr_batch2 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
Patient = c(5, 5, 6, 5, 6, 5, 5, 7, 8),
Parent_Code = c('C5', 'C6', 'C6', 'C5', 'C5', 'C1', 'C2',
'C3', 'C4'),
Count = 1:9)
df_ehr_batch3 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
Patient = c(5, 5, 6, 5, 6, 5, 5, 7, 8),
Parent_Code = c('C5', 'C6', 'C6', 'C5', 'C5', 'C1', 'C2',
'C7', 'C8'),
Count = 1:9)
uniq_codes = paste0('C', 1:8)
spm_cooc_batch = build_df_cooc(df_ehr_batch1, uniq_codes, min_code_freq = 0)
spm_cooc_batch = spm_cooc_batch + build_df_cooc(df_ehr_batch2, uniq_codes, min_code_freq = 0)
spm_cooc_batch + build_df_cooc(df_ehr_batch3, uniq_codes, min_code_freq = 0)
# there's your error
# also just with
build_df_cooc(df_ehr_batch3, uniq_codes)
# only affects code freqs, not code coocs
df_ehr_batch1 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
Parent_Code = c('C7', 'C8', 'C8', 'C7', 'C7', 'C7', 'C8',
'C3', 'C4'),
Count = 1:9)
build_df_cooc(df_ehr_batch1, uniq_codes)
}
test_that('build_df_cooc', test_build_df_cooc())
test_build_df_cooc_cuis = function() {
# with cuis
# devtools::load_all()
#5 x 5 sparse Matrix of class "dgCMatrix"
# BACITRACIN C0000726 C0000729 C0000731 C0000737
#BACITRACIN 15 8 1 1 2
#C0000726 . 707 16 27 79
#C0000729 . . 19 3 5
#C0000731 . . . 31 10
#C0000737 . . . . 110
df_ehr = get(load('df_ehr_10_patients_cuis.rds'))
#library(profvis)
#p = profvis({ df_cooc = build_df_cooc(df_ehr) })
#htmlwidgets::saveWidget(p, file = 'profiling.html')
system.time({
df_cooc = build_df_cooc(df_ehr, n_cores = 3)
})
spm_cooc = df_cooc
code_freqs = Matrix::diag(spm_cooc)
expect_true(!any(is.na(code_freqs)))
expect_true(!any(code_freqs == 0))
}
#test_that('build_df_cooc_cuis', test_build_df_cooc_cuis())
test_spm_to_df = function() {
# obj = "structure(list(V1 = c(\"C1\", \"C1\", \"C2\", \"C3\", \"C4\"),
# V2 = c(\"C1\", \"C2\", \"C2\", \"C3\", \"C4\"),
# value = c(16, 7, 12, 8, 9)), row.names = c(NA, -5L),
# header = \"4 x 4 sparse Matrix of class \\\"dgCMatrix\\\", with 5 entries\",
# class = \"data.frame\")"
obj = "structure(list(V1 = c(\"C1\", \"C1\", \"C2\", \"C3\", \"C4\"),
V2 = c(\"C1\", \"C2\", \"C2\", \"C3\", \"C4\"),
value = c(16, 7, 12, 8, 9)), row.names = c(NA, -5L),
class = \"data.frame\")"
expect_equal(spm_to_df(spm_cooc), eval(parse(text = obj)))
}
test_that('spm_to_df', test_spm_to_df())
if (FALSE) {
# larger tests
# devtools::load_all()
#system.time({
set.seed(1)
n_rows = 2e3 * 4 * 12 / 2
df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
Patient = sample(1:4, n_rows, replace = TRUE),
Month = sample(1:12, n_rows, replace = TRUE),
Count = sample(1:9, n_rows, replace = TRUE))
df_ehr = subset(df_ehr, !duplicated(paste0('P', Patient, 'M', Month, 'C', Parent_Code)))
df_cooc = build_df_cooc(df_ehr)
#library(profvis)
#p = profvis({df_cooc = build_df_cooc(df_ehr)})
#htmlwidgets::saveWidget(p, file = 'profiling.html')
#})
# devtools::load_all()
set.seed(1)
n_rows = 2e3 * 4 * 12 / 2
df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
Patient = sample(1:4, n_rows, replace = TRUE),
Month = sample(1:12, n_rows, replace = TRUE),
Count = sample(1:9, n_rows, replace = TRUE))
df_ehr = subset(df_ehr, !duplicated(paste0('P', Patient, 'M', Month, 'C', Parent_Code)))
system.time({
df_cooc = build_df_cooc(df_ehr)
})
df_cooc[1:5, 1:5]
df_cooc = build_df_cooc(df_ehr,n_cores = 4)
# 1 patient
set.seed(1)
n_rows = 2e3 * 4 * 12 / 2
df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
Patient = sample(1, n_rows, replace = TRUE),
Count = sample(1:9, n_rows, replace = TRUE),
Month = sample(1:12, n_rows, replace = TRUE))
df_cooc = build_df_cooc(df_ehr)
df_cooc[1:5, 1:5]
# 1 month
set.seed(1)
n_rows = 2e3 * 4 * 12 / 2
df_ehr = data.frame(Parent_Code = sample(paste0('C', 1:2e3), n_rows, replace = TRUE),
Patient = sample(1, n_rows, replace = TRUE),
Count = sample(1:9, n_rows, replace = TRUE),
Month = sample(1, n_rows, replace = TRUE))
df_cooc = build_df_cooc(df_ehr)
df_cooc[1:5, 1:5]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.