# !diagnostics off
getCases <- function(outcome,cpsCode,icd9Code,icd10Code,rep_code,inccanv=NULL,sitec=NULL,
initialPath=file.path("s:/cps")){
require(dplyr)
message("getCases() was updated on July 30th 2019 and includes followup through June 2015. Please update the function if followup is extended")
# Pull the date of interview variables for a censoring cohort - include prevalent cancer variable
foo <- rbind(variables$men,variables$women)
foo$FullPath <- file.path(initialPath,foo$subPath,foo$File)
dtint <- dplyr::filter(foo,substr(Variables,1,5)=="DTINT")
files <- dtint$FullPath
keep <- c("ID",dtint$Variables,"CA92","CA82")
rm(foo)
men <- lapply(dtint$FullPath[dtint$Sex=="MEN"],function(x){
foo <- readRDS(x)
foo <- foo[,names(foo) %in% keep]
}) %>% Reduce(function(x,y) full_join(x,y,"ID"),.)
men$SEX <- "MEN"
women <- lapply(dtint$FullPath[dtint$Sex=="WOMEN"],function(x){
foo <- readRDS(x)
foo <- foo[,names(foo) %in% keep]
}) %>% Reduce(function(x,y) full_join(x,y,"ID"),.)
women$SEX <- "WOMEN"
dtint <- dplyr::bind_rows(men,women)
rm(keep,men,women)
# Master and person-year files
master <- rbind(readRDS(file.path(initialPath,"master/cps2smgmst17f.rds")),
readRDS(file.path(initialPath,"master/cps2smgmst17m.rds")))
master$MASTER <- 1
pyear <- rbind(readRDS(file.path(initialPath,"nutrition/pyear/nut92fempyr1216.rds")),
readRDS(file.path(initialPath,"nutrition/pyear/nut92menpyr1216.rds")))
# Mortality files - input codes included in function call
mortality <- readRDS(file.path(initialPath,"mortality/smgmrt17d190411.rds"))
mortality$DEAD <- 1
# Reported files
path <- file.path(initialPath, "nutrition/reported")
# Reported
report15 <- readRDS(file.path(path,"cancer15apr2019reported.rds"))
report15 <- reportedFun(report15,"15",rep_code)
report13 <- readRDS(file.path(path,"cancer13aug2018reported.rds"))
report13 <- reportedFun(report13,"13",rep_code)
report11 <- readRDS(file.path(path,"cancer11aug2018reported.rds"))
report11 <- reportedFun(report11,"11",rep_code)
report09 <- readRDS(file.path(path,"cancer09aug2018reported.rds"))
report09 <- reportedFun(report09,"09",rep_code)
report07 <- readRDS(file.path(path,"cancer07aug2018reported.rds"))
report07 <- reportedFun(report07,"07",rep_code)
report05 <- readRDS(file.path(path,"cancer05aug2018reported.rds"))
report05 <- reportedFun(report05,"05",rep_code)
report03 <- readRDS(file.path(path,"cancer03mar2011reported.rds"))
report03 <- reportedFun(report03,"03",rep_code)
report01 <- readRDS(file.path(path,"cancer01mar2011reported.rds"))
report01 <- reportedFun(report01,"01",rep_code)
report99 <- readRDS(file.path(path,"cancer99sep2007reported.rds"))
report99 <- reportedFun(report99,"99",rep_code)
report97 <- readRDS(file.path(path,"cancer97feb2015reported.rds"))
report97 <- reportedFun(report97,"97",rep_code)
reported <- Reduce(function(x,y) full_join(x,y,"ID"),
list(report97,report99,report01,report03,report05,
report07,report09,report11,report13,report15))
reported <- reported[!duplicated(reported$ID),]
rm(report97,report99,report01,report03,report05,
report07,report09,report11,report13,report15)
# Verified file
verified <- readRDS(file.path(initialPath,"nutrition/verified/mar2019verified.rds"))
# Recode diagnosis date for everyone
verified$DODDY[verified$DODDY=="99"] <- "15"
verified$DODMO[verified$DODMO=="99"] <- "6"
verified$DODDY[verified$DODDY > "30" & verified$DODMO %in% c("04","06","09","11")] <- "30"
verified$DODDY[verified$DODDY > "28" & verified$DODMO==2] <- "28"
verified$DXDATE <- as.Date(paste(verified$DODYR, verified$DODMO, verified$DODDY, sep="-"),
format="%Y-%m-%d")
# Select only first diagnoses
verified <- filter(verified[order(verified$DXDATE),],!duplicated(ID) &
DXDATE < as.Date("2015-06-30"))
# Selected cancer and all other cancers diagnosis
verified$VERFCANCER <- with(verified,ifelse(
INCCANV %in% inccanv | SITEC %in% sitec,1,0))
verified$VERFOTHER <- ifelse(verified$VERFCANCER==1,0,1)
# Drop SEX because the variable messes up my merges
verified <- dplyr::select(verified,-SEX)
# I want to keep all the variables in the verified file
verifiedNames <- names(verified)
verifiedNames <- verifiedNames[!verifiedNames %in% c("VERFCANCER","VERFOTHER","ID")]
# Rename othver variables and keep
ver <- select(verified[verified$VERFCANCER==1,],-VERFOTHER)
othver <- verified[verified$VERFOTHER==1,]
othver$OTHDXDATE <- othver$DXDATE
othver$OTHVERCAN <- othver$INCCANV
othver <- othver[,c("ID","OTHDXDATE","OTHVERCAN","VERFOTHER")]
cohort <- dplyr::filter(Reduce(function(x,y) full_join(x,y,"ID"),
list(dtint,ver,othver,master,mortality,pyear,reported)),
NUT_92==1 & MASTER==1)
names(cohort) <- tolower(names(cohort))
rm(dtint,master,mortality,pyear,reported,verified,othver,ver)
# Convert dates
dates <- c("dtint82","dtint92","dtint97","dtint99","dtint01","dtint03","dtint05",
"dtint07","dtint09","dtint11","dtint13","dtint15","bdaydate","datedd")
cohort[,dates] <- lapply(cohort[,dates],function(x) as.Date(x,origin="1960-01-01"))
# Hard code some missing data
na <- c("dead","verfcancer","verfother","report97","report99","report01","report03",
"report05","report07","report09","report11","report13","report15")
cohort[,na] <- lapply(cohort[,na],function(x) ifelse(is.na(x),0,x))
# Clean diagnosis dates
# Clean up the diagnosis dates
cohort$dxdate <- dxdateClean(cohort,"dxdate")
# End of followup
cohort$dead <- ifelse(cohort$dead==1 & cohort$datedd > "2015-06-30",0,cohort$dead)
cohort$dthdate <- ifelse(cohort$dead==1,cohort$datedd,NA)
# lost to followup
cohort$lostfu <- ifelse(apply(cohort[,c("nut_97","nut_99","nut_01","nut_03",
"nut_05","nut_07","nut_09","nut_11",
"nut_13","nut_15")],1,sum)==0,1,0)
cohort$lostfu[cohort$lostfu==1 & cohort$datedd <= "1997-12-31"] <- 0
# Fatal cases
# any cancer
cohort$anycancer <- with(cohort,ifelse(
(codetype=='1' & code1>='31' & code1 <='99') |
(codetype=='2' & code1>='140' & code1 <= '2099') |
(codetype=='2' & code1>='230' & code1 <= '2399') |
(codetype=='3' & code1>='C00' & code1 <= 'D099') |
(codetype=='3' & code1>='D37' & code1 <= 'D499'),1,0))
cohort$cancerdth <- with(cohort,ifelse(
(codetype=="1" & code1 %in% cpsCode) |
(codetype=="2" & code1 %in% icd9Code) |
(codetype=="3" & code1 %in% icd10Code),1,ifelse(
anycancer==0 &
(codetype=="1" & code2 %in% cpsCode) |
(codetype=="2" & code2 %in% icd9Code) |
(codetype=="3" & code2 %in% icd10Code),1,ifelse(
anycancer==0 &
(codetype=="1" & code3 %in% cpsCode) |
(codetype=="2" & code3 %in% icd9Code) |
(codetype=="3" & code3 %in% icd10Code),1,0))))
# some deaths occurred after the end of followup date
cohort$anycancer[cohort$dead==0] <- 0
cohort$cancerdth[cohort$dead==0] <- 0
# death only cases
cohort$cancerdied <- ifelse(cohort$verfcancer==0 & cohort$cancerdth==1,1,0)
# Incident case: verified + died (don't include self-only)
cohort$incidence <- ifelse(cohort$cancerdied==1 | cohort$verfcancer==1,1,0)
cohort$incidence[is.na(cohort$incidence)] <- 0
# Outcome definitions
report <- paste0("report",c("97","99","01","03","05","07","09","11","13","15"))
cohort$selfreport <- ifelse(apply(cohort[,report],1,sum) != 0,1,0)
# Flag the self-only cases
cohort$self_only <- ifelse(cohort$incidence==0 & cohort$selfreport==1,1,0)
# Censoring ---------------------------------------------------------------
# Censor criteria
# 1. Last survey returned
# 2. Date of any diagnosis/death
# 3. Reported cancer of interest
cohort$endfu <- as.Date("2015-06-30",origin="1970-01-01")
cohort$dateft <- apply(cohort[,c("datedd","endfu")],1,function(x){
min(as.Date(x))
})
cohort$dateft <- with(cohort,ifelse(dead==1, dthdate,dateft))
cohort$dateft <- with(cohort,ifelse(verfcancer==1 & !is.na(dxdate),dxdate,dateft))
cohort$dateft <- with(cohort,ifelse(verfother==1, othdxdate,dateft))
cohort$dateft <- as.Date(cohort$dateft,origin="1970-01-01")
# 2013
cohort$cen13 <- 0
cohort$cen13 <- with(cohort,ifelse(
((!is.na(dtint13) & dateft>dtint13) | (is.na(dtint13) & dateft>=as.Date("2013-10-31"))) &
(nut_13==1 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2015-06-30"))), 1, 0))
cohort$cen13[cohort$report13==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change13 <- ifelse(cohort$cen13 == 1,1,0)
cohort$dateft[cohort$change13==1] <- cohort$dtint13[cohort$change13==1]
cohort$dateft[cohort$change13==1 & is.na(cohort$dtint13)] <- as.Date("2013-10-31")
# 2011
cohort$cen11 <- 0
cohort$cen11 <- with(cohort,ifelse(
((!is.na(dtint11) & dateft>dtint11) | (is.na(dtint11) & dateft>=as.Date("2011-10-31"))) &
(nut_11==1 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2013-10-31"))), 1, 0))
cohort$cen11[cohort$report11==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change11 <- ifelse(cohort$cen11 == 1,1,0)
cohort$dateft[cohort$change11==1] <- cohort$dtint11[cohort$change11==1]
cohort$dateft[cohort$change11==1 & is.na(cohort$dtint11)] <- as.Date("2011-10-31")
# 2009
cohort$cen09 <- 0
cohort$cen09 <- with(cohort,ifelse(
((!is.na(dtint09) & dateft>dtint09) | (is.na(dtint09) & dateft>=as.Date("2009-10-31"))) &
(nut_09==1 & nut_11==0 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2011-10-31"))), 1, 0))
cohort$cen09[cohort$report09==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change09 <- ifelse(cohort$cen09 == 1,1,0)
cohort$dateft[cohort$change09==1] <- cohort$dtint09[cohort$change09==1]
cohort$dateft[cohort$change09==1 & is.na(cohort$dtint09)] <- as.Date("2009-10-31")
# 2007
cohort$cen07 <- 0
cohort$cen07 <- with(cohort,ifelse(
((!is.na(dtint07) & dateft>dtint07) | (is.na(dtint07) & dateft>=as.Date("2007-10-31"))) &
(nut_07==1 & nut_09==0 & nut_11==0 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2009-10-31"))), 1, 0))
cohort$cen07[cohort$report07==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change07 <- ifelse(cohort$cen07 == 1,1,0)
cohort$dateft[cohort$change07==1] <- cohort$dtint07[cohort$change07==1]
cohort$dateft[cohort$change07==1 & is.na(cohort$dtint07)] <- as.Date("2007-10-31")
# 2005
cohort$cen05 <- 0
cohort$cen05 <- with(cohort,ifelse(
((!is.na(dtint05) & dateft>dtint05) | (is.na(dtint05) & dateft>=as.Date("2005-10-31"))) &
(nut_05==1 & nut_07==0 & nut_09==0 & nut_11==0 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2007-10-31"))), 1, 0))
cohort$cen05[cohort$report05==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change05 <- ifelse(cohort$cen05 == 1,1,0)
cohort$dateft[cohort$change05==1] <- cohort$dtint05[cohort$change05==1]
cohort$dateft[cohort$change05==1 & is.na(cohort$dtint05)] <- as.Date("2005-10-31")
# 2003
cohort$cen03 <- 0
cohort$cen03 <- with(cohort,ifelse(
((!is.na(dtint03) & dateft>dtint03) | (is.na(dtint03) & dateft>=as.Date("2003-10-31"))) &
(nut_03==1 & nut_05==0 & nut_07==0 & nut_09==0 & nut_11==0 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2005-10-31"))), 1, 0))
cohort$cen03[cohort$report03==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change03 <- ifelse(cohort$cen03 == 1,1,0)
cohort$dateft[cohort$change03==1] <- cohort$dtint03[cohort$change03==1]
cohort$dateft[cohort$change03==1 & is.na(cohort$dtint03)] <- as.Date("2003-10-31")
# 2001
cohort$cen01 <- 0
cohort$cen01 <- with(cohort,ifelse(
((!is.na(dtint01) & dateft>dtint01) | (is.na(dtint01) & dateft>=as.Date("2001-12-31"))) &
(nut_01==1 & nut_03==0 & nut_05==0 & nut_07==0 & nut_09==0 & nut_11==0 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2003-10-31"))), 1, 0))
cohort$cen01[cohort$report01==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change01 <- ifelse(cohort$cen01 == 1,1,0)
cohort$dateft[cohort$change01==1] <- cohort$dtint01[cohort$change01==1]
cohort$dateft[cohort$change01==1 & is.na(cohort$dtint01)] <- as.Date("2001-10-31")
# 1999
cohort$cen99 <- 0
cohort$cen99 <- with(cohort,ifelse(
((!is.na(dtint99) & dateft>dtint99) | (is.na(dtint99) & dateft>=as.Date("1999-12-31"))) &
(nut_99==1 & nut_01==0 & nut_03==0 & nut_05==0 & nut_07==0 & nut_09==0 &
nut_11==0 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("2001-12-31"))), 1, 0))
cohort$cen99[cohort$report99==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change99 <- ifelse(cohort$cen99 == 1,1,0)
cohort$dateft[cohort$change99==1] <- cohort$dtint99[cohort$change99==1]
cohort$dateft[cohort$change99==1 & is.na(cohort$dtint99)] <- as.Date("1999-10-31")
# 1997
cohort$cen97 <- 0
cohort$cen97 <- with(cohort,ifelse(
((!is.na(dtint97) & dateft>dtint97) | (is.na(dtint97) & dateft>=as.Date("1997-12-31"))) &
(nut_97==1 & nut_99==0 & nut_01==0 & nut_03==0 & nut_05==0 & nut_07==0 & nut_09==0 &
nut_11==0 & nut_13==0 & nut_15==0) &
((dead==0) | (dead==1 & datedd>as.Date("1999-12-31"))), 1, 0))
cohort$cen97[cohort$report97==1 & cohort$verfcancer==0 & cohort$cancerdied==0] <- 1 # censor on reported
cohort$change97 <- ifelse(cohort$cen97 == 1,1,0)
cohort$dateft[cohort$change97==1] <- cohort$dtint97[cohort$change97==1]
cohort$dateft[cohort$change97==1 & is.na(cohort$dtint97)] <- as.Date("1997-10-31")
cohort$changeft <- ifelse(apply(cohort[,c("change97","change99","change01","change03",
"change05","change07","change09","change11",
"change13")],1,sum)>0,1,0)
# flag prevalent cancers
cohort[,c("prev82","prev92")] <- 0
cohort$prev82[!cohort$ca82 %in% c("N","73")] <- 1
cohort$prev92[cohort$ca92 != 0] <- 1
cohort$prevcancer <- with(cohort,ifelse(prev92==1 | prev82==1,1,0))
# Change case value for censored cases
cohort$incidence[cohort$incidence==1 & cohort$changeft==1] <- 0
cohort[cohort$incidence==0,c("cancerdth","cancerdied","cancerdth2","selfreport","verfcancer")] <- 0
# Change some variable names for my outcomes to make them more descriptive
cohort[,paste0(outcome,"_","FINAL")] <- cohort$incidence
cohort[,paste0(outcome,"_","DEATH")] <- cohort$cancerdth
cohort[,paste0(outcome,"_","DIED")] <- cohort$cancerdied
cohort[,paste0(outcome,"_","SELF")] <- cohort$selfreport
cohort[,paste0(outcome,"_","VERIFIED")] <- cohort$verfcancer
renames <- paste0(outcome,"_",c("FINAL","DEATH","DIED","SELF","VERIFIED"))
# Only want to return some of the variables
keep <- c("sex","code1","code2","code3","dead","dthdate","lostfu","anycancer",
"dateft","prev82","prev92","prevcancer","self_only","othdxdate","verfother",
"datedd")
names(cohort) <- toupper(names(cohort))
cohort <- cohort[,toupper(c("id",verifiedNames,keep,renames))]
return(cohort)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.