Codes for clean and recode data.^[See childRmd/_09cleanRecode.Rmd
file for other codes]
mydata <- janitor::clean_names(mydata) # cat(names(mydata), sep = ",\n") # names(mydata) <- c(names(mydata)[1:21], paste0("Soru", 1:30))
iris %>% explore::clean_var(data = ., var = Sepal.Length, min_val = 4.5, max_val = 7.0, na = 5.8, name = "sepal_length") %>% describe()
summarytools::view(summarytools::dfSummary(mydata)) dplyr::glimpse(mydata)
questionr::irec()
questionr::iorder()
questionr::icut()
iris %>% mutate(sumVar = rowSums(.[1:4]))
iris %>% mutate(sumVar = rowSums(select(., contains("Sepal")))) %>% head
iris %>% mutate(sumVar = select(., contains("Sepal")) %>% rowSums()) %>% head
library(finalfit) # https://www.datasurg.net/2019/10/15/jama-retraction-after-miscoding-new-finalfit-function-to-check-recoding/ # intentionally miscoded colon_s %>% mutate( sex.factor2 = forcats::fct_recode(sex.factor, "F" = "Male", "M" = "Female") ) %>% count(sex.factor, sex.factor2)
# Install # devtools::install_github('ewenharrison/finalfit') library(finalfit) library(dplyr) # Recode example colon_s_small = colon_s %>% select(-id, -rx, -rx.factor) %>% mutate( age.factor2 = forcats::fct_collapse(age.factor, "<60 years" = c("<40 years", "40-59 years")), sex.factor2 = forcats::fct_recode(sex.factor, # Intentional miscode "F" = "Male", "M" = "Female") ) # Check colon_s_small %>% finalfit::check_recode()
out = colon_s_small %>% select(-extent, -extent.factor,-time, -time.years) %>% # choose to exclude variables check_recode(include_numerics = TRUE)
## Recoding mydata$cinsiyet into mydata$Cinsiyet mydata$Cinsiyet <- recode(mydata$cinsiyet, "K" = "Kadin", "E" = "Erkek") mydata$Cinsiyet <- factor(mydata$Cinsiyet)
## Recoding mydata$tumor_yerlesimi into mydata$TumorYerlesimi mydata$TumorYerlesimi <- recode(mydata$tumor_yerlesimi, "proksimal" = "Proksimal", "distal" = "Distal", "yaygın" = "Yaygin", "gö bileşke" = "GEJ", "antrum" = "Antrum") mydata$TumorYerlesimi <- factor(mydata$TumorYerlesimi) ## Reordering mydata$TumorYerlesimi mydata$TumorYerlesimi <- factor(mydata$TumorYerlesimi, levels=c("GEJ", "Proksimal", "Antrum", "Distal", "Yaygin"))
## Recoding mydata$histolojik_alt_tip into mydata$HistolojikAltTip mydata$HistolojikAltTip <- recode(mydata$histolojik_alt_tip, "medüller benzeri" = "meduller benzeri") mydata$HistolojikAltTip <- factor(mydata$HistolojikAltTip)
## Recoding mydata$lauren_siniflamasi into mydata$Lauren mydata$Lauren <- recode(mydata$lauren_siniflamasi, "diffüz" = "diffuse", "???" = "medullary") mydata$Lauren <- factor(mydata$Lauren)
## Recoding mydata$histolojik_derece into mydata$Grade mydata$Grade <- recode(mydata$histolojik_derece, "az diferansiye" = "az", "iyi diferansiye" = "iyi", "orta diferansiye" = "orta") mydata$Grade <- factor(mydata$Grade) ## Reordering mydata$Grade mydata$Grade <- factor(mydata$Grade, levels=c("iyi", "orta", "az"))
mydata$Tstage <- stringr::str_match(mydata$patolojik_evre, paste('(.+)', "N", sep=''))[,2] mydata$Nstage <- paste0("N", stringr::str_match(mydata$patolojik_evre, paste( "N", '(.+)', "M", sep=''))[,2] ) mydata$Mstage <- paste0("M", stringr::str_match(mydata$patolojik_evre, paste("M", '(.+)', sep=''))[,2] )
mydata <- mydata %>% dplyr::mutate( T_stage = dplyr::case_when( grepl(pattern = "T1", x = .$Tstage) == TRUE ~ "T1", grepl(pattern = "T2", x = .$Tstage) == TRUE ~ "T2", grepl(pattern = "T3", x = .$Tstage) == TRUE ~ "T3", grepl(pattern = "T4", x = .$Tstage) == TRUE ~ "T4", TRUE ~ "Tx" ) ) %>% dplyr::mutate( N_stage = dplyr::case_when( grepl(pattern = "N0", x = .$Nstage) == TRUE ~ "N0", grepl(pattern = "N1", x = .$Nstage) == TRUE ~ "N1", grepl(pattern = "N2", x = .$Nstage) == TRUE ~ "N2", grepl(pattern = "N3", x = .$Nstage) == TRUE ~ "N3", TRUE ~ "Nx" ) ) %>% dplyr::mutate( M_stage = dplyr::case_when( grepl(pattern = "M0", x = .$Mstage) == TRUE ~ "M0", grepl(pattern = "M1", x = .$Mstage) == TRUE ~ "M1", TRUE ~ "Mx" ) )
## Recoding mydata$cd44_oran into mydata$CD44 mydata$CD44 <- recode(mydata$cd44_oran, "2" = "positive", "0" = "negative", "1" = "negative", "3" = "positive") mydata$CD44 <- factor(mydata$CD44)
## Recoding mydata$her2_skor into mydata$Her2 mydata$Her2 <- recode(mydata$her2_skor, "+3" = "3", "+1" = "1", "+2" = "2") mydata$Her2 <- factor(mydata$Her2) ## Reordering mydata$Her2 mydata$Her2 <- factor(mydata$Her2, levels=c("0", "1", "2", "3"))
## Recoding mydata$msi into mydata$MMR mydata$MMR <- recode(mydata$msi, "MSS" = "pMMR", "MSİ(PMS2,MLH1)" = "dMMR(PMS2,MLH1)", "MSİ(MSH2,MSH6)" = "dMMR(MSH2,MSH6)", "MSİ(PMS2)" = "dMMR(PMS2)") mydata$MMR <- factor(mydata$MMR) ## Recoding mydata$msi into mydata$MMR2 mydata$MMR2 <- recode(mydata$msi, "MSS" = "pMMR", "MSİ(PMS2,MLH1)" = "dMMR", "MSİ(MSH2,MSH6)" = "dMMR", "MSİ(PMS2)" = "dMMR") mydata$MMR2 <- factor(mydata$MMR2)
mydata <- mydata %>% dplyr::mutate( TumorPDL1gr1 = dplyr::case_when( t_pdl1 < 1 ~ "kucuk1", t_pdl1 >= 1 ~ "buyukesit1" ) ) %>% dplyr::mutate( TumorPDL1gr5 = dplyr::case_when( t_pdl1 < 5 ~ "kucuk5", t_pdl1 >= 5 ~ "buyukesit5" ) ) %>% dplyr::mutate( inflPDL1gr1 = dplyr::case_when( i_pdl1 < 1 ~ "kucuk1", i_pdl1 >= 1 ~ "buyukesit1" ) ) %>% dplyr::mutate( inflPDL1gr5 = dplyr::case_when( i_pdl1 < 5 ~ "kucuk5", i_pdl1 >= 5 ~ "buyukesit5" ) )
## Recoding mydata$lvi into mydata$LVI mydata$LVI <- recode(mydata$lvi, "var" = "Var", "yok" = "Yok") mydata$LVI <- factor(mydata$LVI) ## Reordering mydata$LVI mydata$LVI <- factor(mydata$LVI, levels=c("Yok", "Var"))
## Recoding mydata$pni into mydata$PNI mydata$PNI <- recode(mydata$pni, "var" = "Var", "yok" = "Yok") mydata$PNI <- factor(mydata$PNI) ## Reordering mydata$PNI mydata$PNI <- factor(mydata$PNI, levels=c("Yok", "Var"))
## Recoding mydata$ln into mydata$LenfNoduMetastazi mydata$LenfNoduMetastazi <- recode(mydata$ln, "var" = "Var", "yok" = "Yok") mydata$LenfNoduMetastazi <- factor(mydata$LenfNoduMetastazi) ## Reordering mydata$LenfNoduMetastazi mydata$LenfNoduMetastazi <- factor(mydata$LenfNoduMetastazi, levels=c("Yok", "Var"))
mydata$sontarih <- janitor::excel_numeric_to_date(as.numeric(mydata$olum_tarihi))
mydata$Outcome <- "Dead" mydata$Outcome[mydata$olum_tarihi == "yok"] <- "Alive"
# cat(names(mydata), sep = ",\n") mydata <- mydata %>% select( # sira_no, # no, # x3, # hasta_biyopsi_no, # cinsiyet, Cinsiyet, Yas = hasta_yasi, TumorYerlesimi, TumorCapi = tumor_capi, HistolojikAltTip, Lauren, Grade, TNM = patolojik_evre, Tstage, T_stage, Nstage, N_stage, Mstage, M_stage, CD44, Her2, MMR, MMR2, TumorPDL1gr1, TumorPDL1gr5, inflPDL1gr1, inflPDL1gr5, LVI, PNI, LenfNoduMetastazi, Outcome, # tumor_yerlesimi, # histolojik_alt_tip, # lauren_siniflamasi, # histolojik_derece, # cd44_oran, # cd44_intense, # her2_skor, # msi, # t_pdl1, # i_pdl1, # lvi, # pni, # ln, CerrahiTarih = cerrahi_tarih, # olum_tarihi, genel_sagkalim, SonTarih = sontarih )
iRenameColumn.R
iSelectColumn.R
mydata <- janitor::clean_names(mydata) # cat(names(mydata), sep = ",\n") names(mydata) <- c(names(mydata)[1:21], paste0("Soru", 1:30))
library(arsenal) tab1 <- tableby(~ katilim_durumu , data = mydata ) summary(tab1)
mydata <- mydata %>% filter(katilim_durumu == "katılmış ve tamamlamış")
# summarytools::view(summarytools::dfSummary(mydata)) # dplyr::glimpse(mydata) # mydata %>% # select(starts_with("Soru")) %>% # pivot_longer(everything()) %>% # select(value) %>% # pull() %>% # unique() %>% # cat(sep = "\n")
## Recoding mydata$x3_yasiniz_nedir into mydata$YasGrup mydata$YasGrup <- factor(mydata$x3_yasiniz_nedir) ## Reordering mydata$YasGrup mydata$YasGrup <- factor(mydata$YasGrup, levels=c("20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89"))
## Recoding mydata$x4_cinsiyetiniz_nedir into mydata$Cinsiyet mydata$Cinsiyet <- recode(mydata$x4_cinsiyetiniz_nedir, "Kadın" = "Kadin") mydata$Cinsiyet <- factor(mydata$Cinsiyet)
## Recoding mydata$x5_kac_yildir_genel_cerrahi_uzmanisiniz into mydata$UzmanlikSuresi mydata$UzmanlikSuresi <- recode(mydata$x5_kac_yildir_genel_cerrahi_uzmanisiniz, "43739" = "10-19") mydata$UzmanlikSuresi <- factor(mydata$UzmanlikSuresi) ## Reordering mydata$UzmanlikSuresi mydata$UzmanlikSuresi <- factor(mydata$UzmanlikSuresi, levels=c("0-9", "10-19", "20-29", "30-39", "40-49"))
## Recoding mydata$x6_unvaniniz_nedir into mydata$Unvan mydata$Unvan <- factor(mydata$x6_unvaniniz_nedir) ## Reordering mydata$Unvan mydata$Unvan <- factor(mydata$Unvan, levels=c("Op.Dr.", "Doktor Öğretim Üyesi", "Doç.Dr.", "Prof.Dr"))
## Recoding mydata$x8_hangi_kurumda_calisiyorsunuz into mydata$Kurum mydata$Kurum <- recode(mydata$x8_hangi_kurumda_calisiyorsunuz, "Eğitim Araştırma Hastanesi" = "Eğitim Araştırma", "İlçe Devlet Hastanesi" = "İlçe Devlet", "Üniversite Hastanesi" = "Üniversite", "İl Devlet Hastanesi" = "İl Devlet", "Özel Hastane ve Kurumlar" = "Özel") mydata$Kurum <- factor(mydata$Kurum) ## Reordering mydata$Kurum mydata$Kurum <- factor(mydata$Kurum, levels=c("Özel", "İlçe Devlet", "İl Devlet", "Eğitim Araştırma", "Üniversite"))
tersSorular <- c("Soru1", "Soru4", "Soru15", "Soru17", "Soru29") CSS <- c( "Soru3", "Soru6", "Soru12", "Soru16", "Soru18", "Soru20", "Soru22", "Soru24", "Soru27", "Soru30" ) BS <- c( "Soru1", "Soru4", "Soru8", "Soru10", "Soru15", "Soru17", "Soru19", "Soru21", "Soru26", "Soru29" ) STSS <- c( "Soru2", "Soru5", "Soru7", "Soru9", "Soru11", "Soru13", "Soru14", "Soru23", "Soru25", "Soru28" )
recode_numberize <- function(x, ...) { dplyr::recode( x, "Bazı zamanlar" = 3, "Çoksık" = 5, "Hiçbir zaman" = 1, "Nadiren" = 2, "Sık sık" = 4, "Sıkça" = 4, "Bazı zamanlarda" = 3, "Çok sık" = 5, "Sıksık" = 4 ) } mydata <- mydata %>% mutate_at(.tbl = ., .vars = vars(starts_with("Soru"), -tersSorular), .funs = recode_numberize ) recode_numberize_ters <- function(x, ...) { recode( x, "Bazı zamanlar" = 3, "Çoksık" = 1, "Hiçbir zaman" = 5, "Nadiren" = 4, "Sık sık" = 2, "Sıkça" = 2, "Bazı zamanlarda" = 3, "Çok sık" = 1, "Sıksık" = 2 ) } mydata <- mydata %>% mutate_at(.tbl = ., .vars = vars(tersSorular), .funs = recode_numberize )
mydata <- mydata %>% # böyle yazınca missing olunca hesaplamıyor # mutate( # CSS_total = rowSums(select(., CSS), na.rm = FALSE) # ) %>% mutate( CSS_total = rowSums(select(., CSS), na.rm = TRUE) ) %>% mutate( BS_total = rowSums(select(., BS), na.rm = TRUE) ) %>% mutate( STSS_total = rowSums(select(., STSS), na.rm = TRUE) ) mydata <- mydata %>% naniar::replace_with_na_at( .vars = vars(ends_with("_total")), condition = ~.x == 0 )
<= 22 Low >= 23 & <= 41 Average >=42 High
mydata <- mydata %>% mutate_at(.tbl = ., .vars = vars(ends_with("_total")), .funs = list(Gr = ~ case_when( . <= 22 ~ "Low", . >= 23 & . <= 41 ~ "Average", . >= 42 ~ "High", TRUE ~ NA_character_ ) ) ) %>% mutate_at(.tbl = ., .vars = vars(ends_with("_Gr")), .funs = ~ factor(., levels=c("Low", "Average", "High")) ) # ## Reordering mydata$CSS_total_Gr # mydata$CSS_total_Gr <- factor(mydata$CSS_total_Gr, ) # # ## Reordering mydata$BS_total_Gr # mydata$BS_total_Gr <- factor(mydata$BS_total_Gr, levels=c("Low", "Average", "High")) # # # ## Reordering mydata$STSS_total_Gr # mydata$STSS_total_Gr <- factor(mydata$STSS_total_Gr, levels=c("Low", "Average", "High"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.