In Oromendia/oromendia: Clara Oromendia's suite of R functions

knitr::opts_chunk$set(echo = TRUE,eval = FALSE)

This is a collection of snippets I find myself repeating or searching. Change July 27.

Linear mixed models

Code from Patrick Heagerty's class

summary(lme(like_test~uip_dx, method="ML",random=reStruct(~1|id, pdClass="pdSymm", REML=F)))

Remove underscores from column names

colnames(dat) = gsub("_",".",colnames(dat))
colnames(rawDat) = tolower(colnames(rawDat))

Export R dataset for SAS

library(foreign)
write.foreign(df=dat, datafile=paste0(dir,"/dat_03_02datafile.csv"), 
    codefile=paste0(dir,"/dat_03_02codeFile.sas"), package="SAS")

SAS code hints to save labels

proc contents data=TMP1.EXACTBASELINE_PEX11_160307 out=contents noprint;
run;

libname a "O:/Divisional/BIO/mco2004/DoM/Spiromics - EXACT";
data a.labels(keep=NAME LABEL);
    set contents; 
run;

Stop code until you click "continue"

library(tcltk)
waitForClick <- function() {
    tt <- tktoplevel()
    tkpack( tkbutton(tt, text='     Yes - SAS code has run     ', command=function()tkdestroy(tt)),
        side='bottom')
    tkbind(tt,'<Key>', function()tkdestroy(tt) )
    tkwait.window(tt)
}
waitForClick()

Load Times New Roman Font and use in plotting

Note that this is much easier with ggplot.

install.packages("extrafont")
library(extrafont)
# font_import() (takes a long time)
loadfonts(device="win")  #Register fonts for Windows bitmap output
fonts() # list of fonts 
plot(1,1,family="Times New Roman")

Overall plot title

par(mar=c(0,0,0,0),xpd=T)
plot(0,type="n",bty="n",axes=F,ylab="",xlab="")
text(-.8,"MEP Increase from Baseline Under PAS by Dose",cex=2,font=2,family="Times New Roman")

Overall legend

par(mar=c(0,0,0,0),xpd=T,family="Times New Roman")
plot(0,type="n",bty="n",axes=F,ylab="",xlab="")
legend("top",col=2,lwd=4,c("Mean change from baseline"),bty="n",cex=1.5)

Print long table into markdown

kable(., format="html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "center") %>% 
  scroll_box(height = "500px")

Plot of regression coefficients

m1 = glm(onset ~ warl + gdpenl + lpopl1 + lmtnest + ncontig + Oil + nwstate + instab + polity2l + ethfrac + relfrac, data = df, family = "binomial")
library(coefplot)
coefplot(m1)

Markdown barplot with percents and counts

dat %>% 
  filter(!is.na(adt_s)) %>% 
  ggplot(aes(x=adt_s,  group=SPOPmut_m)) + 
  geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
  geom_text(aes(label = ..count.., y= ..prop..), stat= "count", vjust = -.5) +
  geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = 1.5) +
  labs(y = "Percent", fill="adt_s") +
  facet_grid(~SPOPmut_m) +
  scale_y_continuous(labels=percent) +
  labs(title="adt_s by SPOP mutation status",subtitle=paste0("Chisq p=",round(chisq.test(dat$SPOPmut_m,dat$adt_s)$p.val,3))) +
  guides(fill=FALSE)

Lasagna plot

# library(devtools); install_github("swihart/lasagnar")
library(lasagnar)
longDat %>%
  right_join(select(dat_xWeeks,subjid,longestEvent_28d56d)) %>%
  # filter(subjid %in% dat_xWeeks[["subjid"]]) %>%
  filter(studyday > 0 & studyday <= studyDays_here) %>%
  select(subjid,exacttot,studyday) %>%
  spread(studyday,exacttot) %>%
  rename_all(,.funs= function(x) paste0("time",x)) %>%
  # order by initial exact value
  arrange(time1,time2,time3,time4,time5) %>%
  as.data.frame() -> df

rownames(df) <- as.numeric(as.factor(df$timesubjid))
df$timesubjid = NULL
matrix = as.matrix(df)

lasagna(matrix)

Dataset with pairwise p values to plot

paris <- combn(levels(dat_all$GOLD_arabicNum_GOLDold), 2, simplify = FALSE)
pairs_pval <- tidy(with(dat_all[ dat_all$cohort %in% c("1st","2nd"),], pairwise.wilcox.test(mtDNA_log10, GOLD_arabicNum_GOLDold, p.adjust.method = "BH")))
dat_pairs <- do.call(rbind.data.frame, paris)
colnames(dat_pairs) <- colnames(pairs_pval)[-3]
pv_final <- merge(dat_pairs, pairs_pval, by.x = c("group2", "group1"), by.y = c("group1", "group2"))
pv_final <- pv_final[order(pv_final$group1), ] 
pv_final$map_signif <- ifelse(pv_final$p.value > 0.05, "", ifelse(pv_final$p.value > 0.01,"*", "**"))
pv_final$p.value_f <- format_pval(pv_final$p.value,equal = "")

dat_all %>%
  filter(cohort %in% c("1st","2nd")) %>%
  ggplot(aes(GOLD_arabicNum_GOLDold,mtDNA_log10,fill=GOLD_arabicNum_GOLDold)) + 
  geom_boxplot(outlier.colour = NA) +  
  geom_jitter(width = 0.1) + ggtitle("mtDNA by GOLD") + 
  stat_compare_means(label.y=9.6) +
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title="mtDNA by GOLD in 1st and 2nd cohorts",y="Log 10 mtDNA",x="") +
  geom_signif(comparisons=paris,textsize = 3.5, vjust = 0.1, step_increase=0.15,
            annotation= pv_final$p.value_f)

REDCap to tibble using tidyverse

read_csv(here("Docs_2018_0403/ClinicalGenomicsOfCa_DATA_LABELS_2018-04-03_1419.csv")) %>% 
  clean_names() %>% 
  mutate_all(funs(checkedToYes)) %>% 
  mutate_all(funs(uncheckedToNo)) %>% 
  # remove irrelevant variables
  select(-contains("complete")) 
  filter(repeat_instrument == "Biopsies") %>% 
  remove_empty_cols() # from janitor package

REDCap to data frame using base R

# Format nicely categorical variables
makeCat <- function(varNm){
  cols = grep(varNm,colnames(wideDat))
    cats = gsub(".","",gsub(paste0(varNm,"..choice."),"",colnames(wideDat)[cols]),fixed=T)
    all = NA
    for (c in 1:length(cats)) {
      all[wideDat[,cols[c]] == "Checked"] = paste(all[wideDat[,cols[c]] == "Checked"],cats[c])
    }
    all = gsub("NA ","",all)
    return(all)
}
# Add more as needed for analysis
varsToCondense = c("Gastrointestinal.symptoms")
dat = subset(wideDat, select = c(colnames(wideDat)[-grep("choice..",colnames(wideDat))]))
dat = cbind(wideDat,mapply(makeCat,varsToCondense)) # keep multiple columns as well
dim(dat)

# Change all checked/unchecked columns to yes/no
for(col in 1:ncol(dat)) {
  if(length(table(dat[,col]))>1 & levels(as.factor(dat[,col]))[1] =="Checked"){
    levels(dat[,col]) = c("Yes","No")
  }
}
for(col in 1:ncol(dat)) {
  if(length(table(dat[,col]))==1 & !is.na(names(table(dat[,col]))[1]) & names(table(dat[,col]))[1] == "Unchecked") {
    dat[,col] = "No"
  }
}

R to Excel

I use this to create a sheet for each table and figure for a manucript

# Create Excel of select results
#https://trinkerrstuff.wordpress.com/2018/02/14/easily-make-multi-tabbed-xlsx-files-with-openxlsx/
library(openxlsx) 
filename <- paste0("manuscriptFigs_",format(Sys.Date(),"%Y_%m_%d"),".xlsx")
wb <- createWorkbook(type="xlsx")`

Oromendia/oromendia documentation built on April 6, 2021, 6:54 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Oromendia/oromendia
Clara Oromendia's suite of R functions

In Oromendia/oromendia: Clara Oromendia's suite of R functions

Linear mixed models

Remove underscores from column names

Export R dataset for SAS

SAS code hints to save labels

Stop code until you click "continue"

Load Times New Roman Font and use in plotting

Overall plot title

Overall legend

Print long table into markdown

Plot of regression coefficients

Markdown barplot with percents and counts

Lasagna plot

Dataset with pairwise p values to plot

REDCap to tibble using tidyverse

REDCap to data frame using base R

R to Excel

R Package Documentation

Browse R Packages

We want your feedback!

Oromendia/oromendia Clara Oromendia's suite of R functions

In Oromendia/oromendia: Clara Oromendia's suite of R functions

Linear mixed models

Remove underscores from column names

Export R dataset for SAS

SAS code hints to save labels

Stop code until you click "continue"

Load Times New Roman Font and use in plotting

Overall plot title

Overall legend

Print long table into markdown

Plot of regression coefficients

Markdown barplot with percents and counts

Lasagna plot

Dataset with pairwise p values to plot

REDCap to tibble using tidyverse

REDCap to data frame using base R

R to Excel

R Package Documentation

Browse R Packages

We want your feedback!

Oromendia/oromendia
Clara Oromendia's suite of R functions