Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -----------------------------------------------------------------------------
library(dplyr)
library(magrittr)
library(msSPChelpR)
#Load synthetic dataset of patients with cancer to demonstrate package functions
data("us_second_cancer")
#This dataset is in long format, so each tumor is a separate row in the data
us_second_cancer
## -----------------------------------------------------------------------------
#filter for lung cancer
ids <- us_second_cancer %>%
#detect ids with any lung cancer
filter(t_site_icd == "C34") %>%
select(fake_id) %>%
as.vector() %>%
unname() %>%
unlist()
filtered_usdata <- us_second_cancer %>%
#filter according to above detected ids with any lung cancer diagnosis
filter(fake_id %in% ids) %>%
arrange(fake_id)
filtered_usdata
## -----------------------------------------------------------------------------
renumbered_usdata <- filtered_usdata %>%
renumber_time_id(new_time_id_var = "t_tumid",
dattype = "seer",
case_id_var = "fake_id")
renumbered_usdata %>%
select(fake_id, sex, t_site_icd, t_datediag, t_tumid)
## -----------------------------------------------------------------------------
usdata_wide <- renumbered_usdata %>%
reshape_wide_tidyr(case_id_var = "fake_id", time_id_var = "t_tumid", timevar_max = 10)
#now the data is in the wide format as required by many package functions.
#This means, each case is a row and several tumors per case ID are
#add new columns to the data using the time_id as column name suffix.
usdata_wide
## -----------------------------------------------------------------------------
usdata_wide <- usdata_wide %>%
dplyr::mutate(p_spc = dplyr::case_when(is.na(t_site_icd.2) ~ "No SPC",
!is.na(t_site_icd.2) ~ "SPC developed",
TRUE ~ NA_character_)) %>%
#create the same information as numeric variable count_spc
dplyr::mutate(count_spc = dplyr::case_when(is.na(t_site_icd.2) ~ 1,
TRUE ~ 0))
usdata_wide %>%
dplyr::select(fake_id, sex.1, p_spc, count_spc, t_site_icd.1,
t_datediag.1, t_site_icd.2, t_datediag.2)
## -----------------------------------------------------------------------------
usdata_wide <- usdata_wide %>%
pat_status(., fu_end = "2017-12-31", dattype = "seer",
status_var = "p_status", life_var = "p_alive.1",
spc_var = "p_spc", birthdat_var = "datebirth.1",
lifedat_var = "datedeath.1", fcdat_var = "t_datediag.1",
spcdat_var = "t_datediag.2", life_stat_alive = "Alive",
life_stat_dead = "Dead", spc_stat_yes = "SPC developed",
spc_stat_no = "No SPC", lifedat_fu_end = "2019-12-31",
use_lifedatmin = FALSE, check = TRUE,
as_labelled_factor = TRUE)
usdata_wide %>%
dplyr::select(fake_id, p_status, p_alive.1, datedeath.1, t_site_icd.1, t_datediag.1,
t_site_icd.2, t_datediag.2)
#alternatively, you can impute the date of death using lifedatmin_var
usdata_wide %>%
pat_status(., fu_end = "2017-12-31", dattype = "seer",
status_var = "p_status", life_var = "p_alive.1",
spc_var = "p_spc", birthdat_var = "datebirth.1",
lifedat_var = "datedeath.1", fcdat_var = "t_datediag.1",
spcdat_var = "t_datediag.2", life_stat_alive = "Alive",
life_stat_dead = "Dead", spc_stat_yes = "SPC developed",
spc_stat_no = "No SPC", lifedat_fu_end = "2019-12-31",
use_lifedatmin = TRUE, lifedatmin_var = "p_dodmin.1",
check = TRUE, as_labelled_factor = TRUE)
## -----------------------------------------------------------------------------
usdata_wide <- usdata_wide %>%
dplyr::filter(!p_status %in% c("NA - Patient not born before end of FU",
"NA - Patient did not develop cancer before end of FU",
"NA - Patient date of death is missing"))
usdata_wide %>%
dplyr::count(p_status)
## -----------------------------------------------------------------------------
usdata_wide <- usdata_wide %>%
calc_futime(., futime_var_new = "p_futimeyrs", fu_end = "2017-12-31",
dattype = "seer", time_unit = "years",
lifedat_var = "datedeath.1",
fcdat_var = "t_datediag.1", spcdat_var = "t_datediag.2")
usdata_wide %>%
dplyr::select(fake_id, p_status, p_futimeyrs, p_alive.1, datedeath.1, t_datediag.1, t_datediag.2)
## -----------------------------------------------------------------------------
sircalc_results <- usdata_wide %>%
sir_byfutime(
dattype = "seer",
ybreak_vars = c("race.1", "t_dco.1"),
xbreak_var = "none",
futime_breaks = c(0, 1/12, 2/12, 1, 5, 10, Inf),
count_var = "count_spc",
refrates_df = us_refrates_icd2,
calc_total_row = TRUE,
calc_total_fu = TRUE,
region_var = "registry.1",
age_var = "fc_agegroup.1",
sex_var = "sex.1",
year_var = "t_yeardiag.1",
race_var = "race.1",
site_var = "t_site_icd.1", #using grouping by second cancer incidence
futime_var = "p_futimeyrs",
alpha = 0.05)
sircalc_results %>% print(n = 100)
## -----------------------------------------------------------------------------
#The summarize function is versatile. Her for example the summary by
sircalc_results %>%
#summarize results across region, age, year and t_site
summarize_sir_results(.,
summarize_groups = c("region", "age", "year", "race"),
summarize_site = TRUE,
output = "long", output_information = "minimal",
add_total_row = "only", add_total_fu = "no",
collapse_ci = FALSE, shorten_total_cols = TRUE,
fubreak_var_name = "fu_time", ybreak_var_name = "yvar_name",
xbreak_var_name = "none", site_var_name = "t_site",
alpha = 0.05
) %>%
dplyr::select(-region, -age, -year, -race, -sex, -yvar_name)
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.