R/mutate_nkbcind_d_vars.R

Defines functions mutate_nkbcind_d_vars

#' @export
mutate_nkbcind_d_vars <- function(x, ...) {
  dplyr::mutate(x,

    # Kön
    d_kon = factor(
      KON_VALUE,
      levels = c(1, 2),
      labels = c("Män", "Kvinnor")
    ),
    d_kon_en = factor(
      KON_VALUE,
      levels = c(1, 2),
      labels = c("Men", "Women")
    ),

    # Upptäckssätt
    d_screening = factor(
      dplyr::na_if(a_diag_screening_Varde, 98),
      levels = c(1, 0),
      labels = c("Screeningupptäckt", "Kliniskt upptäckt")
    ),
    d_screening_en = factor(
      dplyr::na_if(a_diag_screening_Varde, 98),
      levels = c(1, 0),
      labels = c("Screening-detected", "Clinically detected")
    ),

    # Primär behandling
    d_prim_beh = factor(
      d_prim_beh_Varde,
      levels = c(1, 2, 3),
      labels = c(
        "Primär operation",
        "Preoperativ onkologisk behandling eller konservativ behandling",
        "Ej operation eller fjärrmetastaser vid diagnos"
      )
    ),
    d_prim_beh_en = factor(
      d_prim_beh_Varde,
      levels = c(1, 2, 3),
      labels = c(
        "Primary surgery",
        "Preoperative oncological treatment or conservative treatment",
        "No surgery or distant metastasis at diagnosis"
      )
    ),

    # Planerad åtgärd
    d_a_planbeh_typ = factor(
      a_planbeh_typ_Varde,
      levels = 1:8,
      labels = c(
        "Primär operation",
        "Preoperativ onkologisk behandling eller konservativ behandling",
        "Ej operation eller fjärrmetastaser vid diagnos",
        "Preoperativ onkologisk behandling, cytostatika + ev. annan behandling (operation planeras)",
        "Preoperativ onkologisk behandling, endast endokrin terapi (operation planeras)",
        "Konservativ behandling (oklart om operation blir aktuell/ operation planeras ej)",
        "Fjärrmetastaserande sjukdom",
        "Ingen behandling (patienten avböjt/ annat skäl)"
      )
    ),
    d_a_planbeh_typ_en = factor(
      a_planbeh_typ_Varde,
      levels = 1:8,
      labels = c(
        "Primary surgery",
        "Preoperative oncological treatment or conservative treatment",
        "No surgery or distant metastasis at diagnosis",
        "Preoperative oncological treatment, chemotherapy + any other treatment (surgery is planned)",
        "Preoperative oncological treatment, endocrine treatment only (surgery is planned)",
        "Conservative treatment (unclear if surgery becomes relevant/ surgery is not planned)",
        "Metastatic disease",
        "No treatment (patient declined/ other reason)"
      )
    ),

    # Invasivitet
    d_invasiv = factor(
      d_invasiv_Varde,
      levels = c(1, 2),
      labels = c("Invasiv cancer", "Enbart cancer in situ")
    ),
    d_invasiv_en = factor(
      d_invasiv_Varde,
      levels = c(1, 2),
      labels = c("Invasive cancer", "Cancer in situ only")
    ),

    # Histologisk grad (invasiv) eller kärnatypigrad (cancer in situ)
    d_op_pad_nhg = factor(
      dplyr::case_when(
        op_pad_nhg_Varde %in% c(97, 98, NA) ~ NA_integer_,
        TRUE ~ op_pad_nhg_Varde
      ),
      levels = c(1, 2, 3),
      labels = c("Grad 1", "Grad 2", "Grad 3")
    ),
    d_op_pad_nhg_en = factor(
      dplyr::case_when(
        op_pad_nhg_Varde %in% c(97, 98, NA) ~ NA_integer_,
        TRUE ~ op_pad_nhg_Varde
      ),
      levels = c(1, 2, 3),
      labels = c("Grade 1", "Grade 2", "Grade 3")
    ),

    # ER-status
    d_er = factor(
      d_er_Varde,
      levels = c(1, 2),
      labels = c("Positiv", "Negativ")
    ),
    d_er_en = factor(
      d_er_Varde,
      levels = c(1, 2),
      labels = c("Positive", "Negative")
    ),

    # PgR-status
    d_pr = factor(
      d_pr_Varde,
      levels = c(1, 2),
      labels = c("Positiv", "Negativ")
    ),
    d_pr_en = factor(
      d_pr_Varde,
      levels = c(1, 2),
      labels = c("Positive", "Negative")
    ),

    # HER2-status
    d_her2 = factor(
      d_her2_Varde,
      levels = c(1, 2),
      labels = c("Positiv", "Negativ")
    ),
    d_her2_en = factor(
      d_her2_Varde,
      levels = c(1, 2),
      labels = c("Positive", "Negative")
    ),

    # HER2 IHC
    d_her2ihc_Varde = dplyr::case_when(
      d_prim_beh_Varde == 1 ~ op_pad_her2_Varde,
      d_prim_beh_Varde %in% c(2, 3) ~ a_pad_her2_Varde
    ),

    # HER2 ISH
    d_her2ish_Varde = dplyr::case_when(
      d_prim_beh_Varde == 1 ~ op_pad_her2ish_Varde,
      d_prim_beh_Varde %in% c(2, 3) ~ a_pad_her2ish_Varde
    ),

    # Biologisk subtyp
    d_trigrp = factor(
      d_trigrp_Varde,
      levels = c(3, 2, 1),
      labels = c("Trippelnegativ", "HER2-positiv", "Luminal")
    ),
    d_trigrp_en = factor(
      d_trigrp_Varde,
      levels = c(3, 2, 1),
      labels = c("Triple negative", "HER2 positive", "Luminal")
    ),

    # Ki67
    d_pad_ki67proc = dplyr::case_when(
      d_prim_beh_Varde == 1 ~ op_pad_ki67proc,
      d_prim_beh_Varde %in% c(2, 3) ~ a_pad_ki67proc
    ),

    # Klinisk T-stadium (TNM), dikotomiserad
    d_tstad = factor(
      dplyr::case_when(
        a_tnm_tklass_Varde == 0 ~ 1L,
        a_tnm_tklass_Varde == 5 ~ 1L,
        a_tnm_tklass_Varde == 10 ~ 1L,
        a_tnm_tklass_Varde == 20 ~ 2L,
        a_tnm_tklass_Varde == 30 ~ 2L,
        a_tnm_tklass_Varde == 42 ~ 2L,
        a_tnm_tklass_Varde == 44 ~ 2L,
        a_tnm_tklass_Varde == 45 ~ 2L,
        a_tnm_tklass_Varde == 46 ~ 2L,
        a_tnm_tklass_Varde == 50 ~ NA_integer_,
        is.na(a_tnm_tklass_Varde) ~ NA_integer_
      ),
      levels = c(1, 2),
      labels = c("<=20mm (T0/Tis/T1)", ">20mm (T2-T4)")
    ),
    d_tstad_en = factor(
      dplyr::case_when(
        a_tnm_tklass_Varde == 0 ~ 1L,
        a_tnm_tklass_Varde == 5 ~ 1L,
        a_tnm_tklass_Varde == 10 ~ 1L,
        a_tnm_tklass_Varde == 20 ~ 2L,
        a_tnm_tklass_Varde == 30 ~ 2L,
        a_tnm_tklass_Varde == 42 ~ 2L,
        a_tnm_tklass_Varde == 44 ~ 2L,
        a_tnm_tklass_Varde == 45 ~ 2L,
        a_tnm_tklass_Varde == 46 ~ 2L,
        a_tnm_tklass_Varde == 50 ~ NA_integer_,
        is.na(a_tnm_tklass_Varde) ~ NA_integer_
      ),
      levels = c(1, 2),
      labels = c("<=20mm (T0/Tis/T1)", ">20mm (T2-T4)")
    ),

    # Klinisk N-stadium (TNM), dikotomiserad
    d_nstad = factor(
      dplyr::case_when(
        a_tnm_nklass_Varde == 0 ~ 1L,
        a_tnm_nklass_Varde == 10 ~ 2L,
        a_tnm_nklass_Varde == 20 ~ 2L,
        a_tnm_nklass_Varde == 30 ~ 2L,
        a_tnm_nklass_Varde == 40 ~ NA_integer_,
        is.na(a_tnm_nklass_Varde) ~ NA_integer_
      ),
      levels = c(1, 2),
      labels = c("Nej (cN-)", "Ja (cN+)")
    ),
    d_nstad_en = factor(
      dplyr::case_when(
        a_tnm_nklass_Varde == 0 ~ 1L,
        a_tnm_nklass_Varde == 10 ~ 2L,
        a_tnm_nklass_Varde == 20 ~ 2L,
        a_tnm_nklass_Varde == 30 ~ 2L,
        a_tnm_nklass_Varde == 40 ~ NA_integer_,
        is.na(a_tnm_nklass_Varde) ~ NA_integer_
      ),
      levels = c(1, 2),
      labels = c("No (cN-)", "Yes (cN+)")
    ),

    # Klinisk M-stadium (TNM)
    d_mstad = factor(
      dplyr::case_when(
        a_tnm_mklass_Varde == 0 ~ 1L,
        a_tnm_mklass_Varde == 10 ~ 2L,
        a_tnm_mklass_Varde == 20 ~ NA_integer_,
        is.na(a_tnm_mklass_Varde) ~ NA_integer_
      ),
      levels = c(1, 2),
      labels = c("Nej (M0)", "Ja (M1)")
    ),
    d_mstad_en = factor(
      dplyr::case_when(
        a_tnm_mklass_Varde == 0 ~ 1L,
        a_tnm_mklass_Varde == 10 ~ 2L,
        a_tnm_mklass_Varde == 20 ~ NA_integer_,
        is.na(a_tnm_mklass_Varde) ~ NA_integer_
      ),
      levels = c(1, 2),
      labels = c("No (M0)", "Yes (M1)")
    ),

    # Kliniskt stadium
    d_tnm_stadium_subgrp =
      factor(
        case_when(
          # Stadium IV
          # (oavsett T, oavsett N)
          a_tnm_mklass_Varde %in% 10 ~ "IV",
          # Stadium IIIC
          # (oavsett T)
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 30 ~ "IIIC",
          # Stadium IIIB
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% c(00, 10, 20) &
            a_tnm_tklass_Varde %in% c(42, 44, 45, 46) ~ "IIIB",
          # Stadium IIIA
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 20 &
            a_tnm_tklass_Varde %in% c(00, 10, 20, 30) ~ "IIIA",
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 10 &
            a_tnm_tklass_Varde %in% 30 ~ "IIIA",
          # Stadium IIB
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 10 &
            a_tnm_tklass_Varde %in% 20 ~ "IIB",
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 00 &
            a_tnm_tklass_Varde %in% 30 ~ "IIB",
          # Stadium IIA
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 10 &
            a_tnm_tklass_Varde %in% c(00, 10) ~ "IIA",
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 00 &
            a_tnm_tklass_Varde %in% 20 ~ "IIA",
          # Stadium IB
          # N1mi inte med i a_tnm_nklass
          # Stadium IA
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 00 &
            a_tnm_tklass_Varde %in% 10 ~ "IA",
          # Stadium 0
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 00 &
            a_tnm_tklass_Varde %in% 05 ~ "0",
          # Inrapporterade som "T0N0M0", instruktioner ändrades 2013
          a_tnm_mklass_Varde %in% 00 &
            a_tnm_nklass_Varde %in% 00 &
            a_tnm_tklass_Varde %in% 00 ~ '"T0N0M0"',
          # Annars
          TRUE ~ NA_character_
        ),
        levels = c("0", "IA", "IIA", "IIB", "IIIA", "IIIB", "IIIC", "IV", '"T0N0M0"')
      ),
    d_tnm_stadium = forcats::fct_collapse(
      d_tnm_stadium_subgrp,
      "0" = "0",
      "I" = "IA",
      "II" = c("IIA", "IIB"),
      "III" = c("IIIA", "IIIB", "IIIC"),
      "IV" = "IV",
      '"T0N0M0"' = '"T0N0M0"'
    ),

    # Tumörstorlek vid (primär) operation, kategorier
    d_op_pad_invstl_kat =
      cut(
        dplyr::if_else(d_prim_beh_Varde %in% 1, op_pad_invstl, NA_integer_),
        breaks = c(-Inf, 20, 50, Inf),
        labels = c("<=20 mm", "21-50 mm", ">50 mm")
      ),
    d_op_pad_invstl_kat_en =
      cut(
        dplyr::if_else(d_prim_beh_Varde %in% 1, op_pad_invstl, NA_integer_),
        breaks = c(-Inf, 20, 50, Inf),
        labels = c("<=20 mm", "21-50 mm", ">50 mm")
      ),

    # Tumörstorlek vid (primär) operation, dikotomiserad med brytpunkt 10 mm
    d_op_pad_invstl_diko10 = cut(
      dplyr::if_else(d_prim_beh_Varde %in% 1, op_pad_invstl, NA_integer_),
      breaks = c(-Inf, 10, Inf),
      labels = c("<=10 mm", ">10 mm")
    ),
    d_op_pad_invstl_diko10_en = cut(
      dplyr::if_else(d_prim_beh_Varde %in% 1, op_pad_invstl, NA_integer_),
      breaks = c(-Inf, 10, Inf),
      labels = c("<=10 mm", ">10 mm")
    ),

    # Max extent
    d_max_extent = pmax(op_pad_extentx, op_pad_extenty, na.rm = TRUE),

    # Patologisk N-stadium
    d_pnstat =
      factor(
        dplyr::case_when(
          op_pad_lglusant > 0 & op_pad_lglmetant == 0 ~ "Nej (pN-)",
          op_pad_lglmetant > 0 ~ "Ja (pN+)"
        ),
        levels = c("Nej (pN-)", "Ja (pN+)")
      ),
    d_pnstat_en =
      factor(
        dplyr::case_when(
          op_pad_lglusant > 0 & op_pad_lglmetant == 0 ~ "No (pN-)",
          op_pad_lglmetant > 0 ~ "Yes (pN+)"
        ),
        levels = c("No (pN-)", "Yes (pN+)")
      ),
    d_pn = cut(op_pad_lglmetant, c(1, 4, 100),
      include.lowest = TRUE,
      right = FALSE,
      labels = c("1-3 metastaser", "=> 4 metastaser")
    ),
    d_pn_en = cut(op_pad_lglmetant, c(1, 4, 100),
      include.lowest = TRUE,
      right = FALSE,
      labels = c("1-3 metastases", "=> 4 metastases")
    ),

    # Slutresultat bröstingrepp, kategoriserad
    d_op_kir_brost_kat = factor(
      dplyr::case_when(
        op_kir_brost_Varde %in% 1 ~ 1L,
        op_kir_brost_Varde %in% c(2, 4) ~ 2L
      ),
      levels = c(1, 2),
      labels = c("Partiellt mastektomi", "Mastektomi")
    ),

    # Opererande sjukhus, och om detta saknas, anmälande sjukhus
    d_opans_sjhkod = dplyr::coalesce(
      op_inr_sjhkod,
      a_inr_sjhkod
    ),

    # Opererande sjukhus för primärt opererade fall, annars anmälande sjukhus
    d_pat_sjhkod = dplyr::case_when(
      d_prim_beh_Varde == 1 ~ op_inr_sjhkod,
      d_prim_beh_Varde %in% c(2, 3) ~ a_inr_sjhkod
    ),

    # Sjukhus ansvarigt för primär behandling
    d_prim_beh_sjhkod = dplyr::case_when(
      d_prim_beh_Varde == 1 ~ op_inr_sjhkod,
      d_prim_beh_Varde == 2 ~ pre_inr_sjhkod
    ),

    # Sjukhus där onkologisk behandling ges
    d_onk_sjhkod = dplyr::coalesce(
      post_inr_sjhkod,
      pre_inr_sjhkod
    ),

    # Rapporterande sjukhus där onkologisk behandling ges, och om detta saknas, sjukhus ansvarigt för rapportering av onkologisk behandling, sjukhus för onkologisk behandling, anmälande sjukhus
    d_onkpostans_sjhkod = dplyr::coalesce(
      post_inr_sjhkod,
      op_onk_sjhkod,
      a_onk_rappsjhkod,
      a_onk_sjhkod,
      a_inr_sjhkod
    ),
    d_onkpreans_sjhkod = dplyr::coalesce(
      pre_inr_sjhkod,
      op_onk_sjhkod,
      a_onk_rappsjhkod,
      a_onk_sjhkod,
      a_inr_sjhkod
    ),

    # Sjukhus ansvarigt för rapportering av uppföljning, och om detta saknas,
    # sjukhus för onkologisk behandling, sjukhus ansvarigt för rapportering av
    # onkologisk behandling, opererande sjukhus, anmälande sjukhus
    d_uppfans_sjhkod = dplyr::coalesce(
      op_uppf_sjhkod,
      a_uppf_sjhkod,
      a_onk_sjhkod,
      op_onk_sjhkod,
      a_onk_rappsjhkod,
      a_kir_sjhkod,
      a_inr_sjhkod
    ),

    # Kemoterapi
    d_kemo = as.logical(pmax(post_kemo_Varde, pre_kemo_Varde, na.rm = TRUE)),

    # LKF-region för att imputera om region för sjukhus saknas
    d_region_lkf = dplyr::case_when(
      REGION_NAMN == "Region Sthlm/Gotland" ~ 1L,
      REGION_NAMN == "Region Uppsala/Örebro" ~ 2L,
      REGION_NAMN == "Region Sydöstra" ~ 3L,
      REGION_NAMN == "Region Syd" ~ 4L,
      REGION_NAMN == "Region Väst" ~ 5L,
      REGION_NAMN == "Region Norr" ~ 6L
    ),

    # Vitalstatus
    d_vitalstatus = factor(VITALSTATUS,
      levels = c(0, 1),
      labels = c("Levande", "Avlidna")
    ),
    d_vitalstatus_en = factor(VITALSTATUS,
      levels = c(0, 1),
      labels = c("Alive", "Diseased")
    )
  )
}
oc1lojo/nkbcind documentation built on Sept. 30, 2022, 10:06 p.m.