library(dplyr)
fcds:::requires_package(c("here", "readxl", "readr"), "fcds_import-clean.R")
library(here)
library(readr)
library(readxl)
dat <- read_csv(here("data-raw", "STAT_dataset_2018.dat"))
icd <- read_excel(here("data-raw", "sitetype.icdo3.20180323.xls"))
icd <- icd[!duplicated(icd$`Histology/Behavior`), c("Histology/Behavior", "Histology/Behavior Description")]
saveRDS(icd, here::here("data", "icd.rds"))
# recode datedx, sex, birthplace state, race
dat <- dat %>%
mutate(
dx_year = recode(
Date_of_Dx_Year_Recoded,
"1115" = "2011-2015",
"0610" = "2006-2010",
"0105" = "2001-2005",
"9600" = "1996-2000",
"9195" = "1991-1995",
"8690" = "1986-1990",
"8185" = "1981-1985"),
dx_year_mid = recode(
Date_of_Dx_Year_Recoded,
"1115" = "2013",
"0610" = "2008",
"0105" = "2003",
"9600" = "1998",
"9195" = "1993",
"8690" = "1988",
"8185" = "1983"),
sex = recode(
Sex_Recoded,
`1` = "Male",
`2` = "Female",
`9` = "Unknown"),
race = recode(
Race_Recoded,
`1` = "White",
`2` = "Black",
`3` = "Other",
`9` = "Unknown"),
hispanic = recode(
Ethnicity_Recoded,
`0` = "Not Hispanic",
`8` = "Hispanic",
`9` = "Unknown"),
address = recode(
Addr_at_DX_State_Recoded,
`00` = "Florida",
`01` = "Other US States and Territories",
`02` = "Not Applicable"),
birth_state = recode(
Birthplace_State_Abrv_Recoded,
`00` = "Florida",
`01` = "Other US States and Territories",
`02` = "Not Applicable"),
dx_country = recode(
Addr_at_Dx_Country_Recoded,
`01` = "US States and Territories",
`02` = "Other Countries",
`99` = "Unknown"),
birth_country = recode(
Birthplace_Country_Recoded,
`01` = "US States and Territories",
`02` = "Other Countries",
`99` = "Unknown"),
marital_status = recode(
# coding for unknown different than what is specified in the layout page
Marital_Status_Recoded,
`1` = "Married; Unmarried or Domestic Partner",
`2` = "Single, Separated, Divorced or Widowed",
`9` = "Unknown"),
primary_payer = recode(
Dx_Primary_Payor_Recoded,
`01` = "Not insured",
`02` = "Insurance",
`03` = "Medicaid",
`04` = "Medicare",
`05` = "Tricare",
`99` = "Unknown"),
seer_stage_2000 = recode(
# Although the codes are identical for SEER Summary Stage 1977 and 2000
# the methodology for coding these variables is distinct. Please refer to
# SEER Staging Manuals SEER Summary Staging Guide and the SEER Summary
# Staging Manual 2000. coding from 8 taken from
# (https://seer.cancer.gov/tools/ssm/SSM2018-General-Instructions.pdf)
# since not in ANY OF THE DOCUMENTATION
SEER_Summ_Stage_2000_N759,
`0` = "In Situ",
`1` = "Local",
`2` = "Regional/Direct extension",
`3` = "Regional/Nodes only",
`4` = "Regional/Direct extension & nodes",
`5` = "Regional NOS",
`7` = "Distant/Systemic Disease",
`8` = "Benign/Borderline",
`9` = "Unknown"),
seer_stage_1977 = recode(
SEER_Summ_Stage_1977_N760,
`0` = "In Situ",
`1` = "Local",
`2` = "Regional/Direct extension",
`3` = "Regional/Nodes only",
`4` = "Regional/Direct extension & nodes",
`5` = "Regional NOS",
`7` = "Distant/Systemic Disease",
`8` = "Benign/Borderline",
`9` = "Unknown"),
seer_stage = ifelse(is.na(seer_stage_2000), seer_stage_1977, seer_stage_2000),
fcds_site_group = recode(
# Using most-specific summary-level group from
# https://seer.cancer.gov/siterecode/icdo3_dwhoheme/
# (i.e. level 2 header for a level 3 item, if all level 3 items are grouped together)
FCDS_Site_Group,
`0110` = "Oral Cavity and Pharynx",
`0011` = "Esophagus",
`0012` = "Stomach",
`0013` = "Small Intestine",
`1422` = "Colon excluding Rectum",
`2324` = "Rectum and Rectosigmoid Junction",
`0025` = "Anus, Anal Canal & Anorectum",
`2627` = "Liver and Intrahepatic Bile Duct",
`0028` = "Gall Bladder",
`0029` = "Other Biliary",
`0030` = "Pancreas",
`3133` = "Retroperitoneum, Peritoneum, Omentum & Mesentery, Other Digestive Organs",
## Where is Nose, Nasal Cavity and Middle Ear?
`0035` = "Larynx",
`0036` = "Lung and Bronchus",
## Where are Pleura, and "Trachea ... and Other Respiratory"?
`0039` = "Bones and Joints",
`0040` = "Soft Tissue including Heart",
`0041` = "Melanoma of the Skin",
`0042` = "Other Non-Epithelial Skin",
`0043` = "Breast",
`0044` = "Cervix Uteri",
`4546` = "Corpus and Uterus, NOS",
`0047` = "Ovary",
`4850` = "Vagina, Vulva, Other Female Genital Organs",
`0051` = "Prostate Gland",
`0052` = "Testes",
## Where are Penis and other Male Genital Organs?
`0055` = "Urinary Bladder",
`0056` = "Kidney & Renal Pelvis",
`5758` = "Ureter, Other Urinary Organs",
`0059` = "Eye and Orbit",
`6061` = "Brain and Other Nervous System",
`0062` = "Thyroid Gland",
## Missing "Other Endocrin including Thymus" (possibly in Benign/Borderline)
`6465` = "Hodgkin Lymphoma",
`6667` = "Non-Hodgkin Lymphoma",
`0068` = "Multiple Myeloma",
`6971` = "Lymphocytic Leukemia",
`7275` = "Myeloid and Monocytic Leukemia",
`7677` = "Other Leukemia",
`0078` = "Mesothelioma",
`0079` = "Kaposi Sarcoma",
`0080` = "Other",
`8183` = "Benign/Borderline (Brain, Nervous System, Endocrine)"),
fcds_site_specific = recode(
# https://fcds.med.miami.edu/downloads/datarequest/STAT%202018%20layout.pdf
FCDS_Site_Group,
`0110` = paste("Lip, Tongue, Salivary Glands , Floor of Mouth, Gum & Other Mouth,",
"Nasopharynx, Tonsil, Oropharynx, Hypopharynx, Other Buccal Cavity & Pharynx"),
`0011` = "Esophagus",
`0012` = "Stomach",
`0013` = "Small Intestine",
`1422` = paste("Cecum, Appendix, Ascending Colon, Hepatic Flexure, Transverse Colon,",
"Splenic Flexure, Descending Colon, Sigmoid Colon, Large Intestine, NOS"),
`2324` = "Rectosigmoid Junction, Rectum",
`0025` = "Anus, Anal Canal & Anorectum",
`2627` = "Liver, Intrahepatic Bile Duct",
`0028` = "Gall Bladder",
`0029` = "Other Biliary",
`0030` = "Pancreas",
`3133` = "Retroperitoneum, Peritoneum, Omentum & Mesentery, Other Digestive Organs",
`0035` = "Larynx",
`0036` = "Lung & Bronchus",
`0039` = "Bones & Joints",
`0040` = "Soft Tissue (Including Heart)",
`0041` = "Melanoma of the Skin",
`0042` = "Other Non-Epithelial Skin",
`0043` = "Breast",
`0044` = "Cervix Uteri",
`4546` = "Corpus Uteri, Uterus, NOS",
`0047` = "Ovary",
`4850` = "Vagina, Vulva, Other Female Genital Organs",
`0051` = "Prostate Gland",
`0052` = "Testes",
`0055` = "Urinary Bladder",
`0056` = "Kidney & Renal Pelvis",
`5758` = "Ureter, Other Urinary Organs",
`0059` = "Eye & Orbit",
`6061` = "Brain, Other Nervous System",
`0062` = "Thyroid Gland",
`6465` = "Hodgkin's Disease Nodal, Hodgkin's Disease Extra Nodal",
`6667` = "Non-Hodgkin's Nodal, Non-Hodgkin's Extra Nodal",
`0068` = "Multiple Myeloma",
`6971` = "Acute Lymphocytic Leukemia, Chronic Lymphocytic Leukemia, Other Lymphocytic Leukemia",
`7275` = "Acute Myeloid Leukemia, Chronic Myeloid Leukemia, Other Myeloid/Monocytic Leukemia, Acute Monocytic Leukemia",
`7677` = "Other Acute Leukemia, Aleukemic, Subleukemic & NOS",
`0078` = "Mesothelioma",
`0079` = "Kaposi Sarcoma",
`0080` = "Other",
`8183` = "Benign/Borderline- Brain, Cranial Nerves Other Nervous System, Other Endocrine including Thymus (Benign/Border)"),
age_group = recode(
FCDS_Age_Group,
`4` = "0 - 4",
`9` = "5 - 9",
`14` = "10 - 14",
`19` = "15 - 19",
`24` = "20 - 24",
`29` = "25 - 29",
`34` = "30 - 34",
`39` = "35 - 39",
`44` = "40 - 44",
`49` = "45 - 49",
`54` = "50 - 54",
`59` = "55 - 59",
`64` = "60 - 64",
`69` = "65 - 69",
`74` = "70 - 74",
`79` = "75 - 79",
`84` = "80 - 84",
`85` = "85+",
`999` = "Unknown"),
county_fips = County_at_DX_N90,
county_name = recode(
County_at_DX_N90,
`1` = "Alachua",
`3` = "Baker",
`5` = "Bay",
`7` = "Bradford",
`9` = "Brevard",
`11` = "Broward",
`13` = "Calhoun",
`15` = "Charlotte",
`17` = "Citrus",
`19` = "Clay",
`21` = "Collier",
`23` = "Columbia",
`27` = "DeSoto",
`29` = "Dixie",
`31` = "Duval",
`33` = "Escambia",
`35` = "Flagler",
`37` = "Franklin",
`39` = "Gadsden",
`41` = "Gilchrist",
`43` = "Glades",
`45` = "Gulf",
`47` = "Hamilton",
`49` = "Hardee",
`51` = "Hendry",
`53` = "Hernando",
`55` = "Highlands",
`57` = "Hillsborough",
`59` = "Holmes",
`61` = "Indian River",
`63` = "Jackson",
`65` = "Jefferson",
`67` = "Lafayette",
`69` = "Lake",
`71` = "Lee",
`73` = "Leon",
`75` = "Levy",
`77` = "Liberty",
`79` = "Madison",
`81` = "Manatee",
`83` = "Marion",
`85` = "Martin",
`86` = "Miami-Dade",
`87` = "Monroe",
`89` = "Nassau",
`91` = "Okaloosa",
`93` = "Okeechobee",
`95` = "Orange",
`97` = "Osceola",
`99` = "Palm Beach",
`101` = "Pasco",
`103` = "Pinellas",
`105` = "Polk",
`107` = "Putnam",
`113` = "Santa Rosa",
`115` = "Sarasota",
`117` = "Seminole",
`109` = "St. Johns",
`111` = "St. Lucie",
`119` = "Sumter",
`121` = "Suwannee",
`123` = "Taylor",
`125` = "Union",
`127` = "Volusia",
`129` = "Wakulla",
`131` = "Walton",
`133` = "Washington",
`999` = "Unknown"),
grade = recode(
Grade_N440,
`1` = "Grade I",
`2` = "Grade II",
`3` = "Grade III",
`4` = "Grade IV",
`5` = "T-cell",
`6` = "B-cell",
`7` = "Null cell",
`8` = "NK cell",
`9` = "Unknown"),
laterality = recode(
Laterality_N410,
`0` = "Not a paired site",
`1` = "Right:origin of primary",
`2` = "Left:origin of primary",
`3` = "Only one side involved, right or left origin unspecified",
`4` = paste("Bilateral involvement at time of diagnosis, lateral origin unknown for a single primary;",
"or both ovaries involved simultaneously, single histology; bilateral retinoblastomas;",
"bilateral Wilms' tumors"),
`5` = "Paired site: midline tumor",
`9` = "Paired site, but no information concerning laterality"),
dx_confirmation = recode(
Diagnostic_Confirmation_N490,
`1` = "Positive histology",
`2` = "Positive cytology",
`3` = "Positive histology PLUS - positive immunophenotyping AND/OR positive genetic studies",
`4` = "Positive microscopic confirmation, method not specified",
`5` = "Positive laboratory test/marker study",
`6` = "Direct visualization without microscopic confirmation",
`7` = "Radiography and/or other imaging techniques without microscopic confirmation",
`8` = "Clinical diagnosis only",
`9` = "Unknown whether or not microscopically confirmed; death certificate only"),
reporting_source = recode(
Type_of_Reporting_Source_N500,
`1` = "Hospital inpatient",
`2` = "Radiation Treatment Centers or Medical Oncology Centers",
`3` = "Laboratory only",
`4` = "Physician's office/private medical practitioner",
`5` = "Nursing/convalescent home/hospice",
`6` = "Autopsy only",
`7` = "Death certificate only",
`8` = "Other hospital outpatient units/surgery centers"),
cancer_status = recode(
Cancer_Status_N1770,
`1` = "No evidence of tumor",
`2` = "Evidence of tumor",
`9` = "Unknown"),
icd03_conversion = recode(
# Code specifying how the conversion of site and morphology codes from
# ICD-O-2 to ICD-O-3 was accomplished.
ICDO3_Conversion_FL_N2116,
`0` = "Originally coded in ICD-O-3",
`1` = "Converted without review",
`3` = "Converted with review"),
seer_stage_derived_1977 = recode(
Derived_SS1977_FL_N3040,
`1` = "Derived from Collaborative Stage",
`2` = "Derived from EOD"),
seer_stage_derived_2000 = recode(
Derived_SS2000_FL_N3050,
`1` = "Derived from Collaborative Stage",
`2` = "Derived from EOD"),
tobacco_cigarette = recode(
FCDS_Tob_Use_Cigarette_N1300,
`0` = "Never used",
`1` = "Current user",
`2` = "Former user, quit within 1 year of Dx",
`3` = "Former user, quit more than 1 year of Dx",
`4` = "Former user, unknown when quit",
`9` = "Unknown"),
tobacco_other = recode(
FCDS_Tob_Use_OthSmoke_N1300,
`0` = "Never used",
`1` = "Current user",
`2` = "Former user, quit within 1 year of Dx",
`3` = "Former user, quit more than 1 year of Dx",
`4` = "Former user, unknown when quit",
`9` = "Unknown"),
tobacco_smokeless = recode(
FCDS_Tob_Use_Smokeless_Tob_N1300,
`0` = "Never used",
`1` = "Current user",
`2` = "Former user, quit within 1 year of Dx",
`3` = "Former user, quit more than 1 year of Dx",
`4` = "Former user, unknown when quit",
`9` = "Unknown"),
tobacco_no = recode(
FCDS_Tob_Use_NOS_N1300,
`0` = "Never used",
`1` = "Current user",
`2` = "Former user, quit within 1 year of Dx",
`3` = "Former user, quit more than 1 year of Dx",
`4` = "Former user, unknown when quit",
`9` = "Unknown"),
behavior = recode(
Behavior_Code_ICDO3_N523,
`0` = "Benign",
`1` = "Borderline",
`2` = "Insitu",
`3` = "Invasive"),
morphology = paste0(Histologic_Type_ICDO3_N522, "/", Behavior_Code_ICDO3_N523)
) %>%
select(patient_id = Patient_ID_N20, dx_year:morphology)
saveRDS(dat, file = here("data/stat_dataset_2018_clean.rds"), compress = "gzip")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.