inst/data-raw/process/PMID28878130_Levet-2017/process.R

library(readxl)
library(tidyr)
library(dplyr)

dat <- read_excel("18-07-03 compilation CD3 CD68 Env03 nPOD.xlsx", range = "A2:AK79")

# Check columns
names(dat)
dat <- dat %>% 
  select(ID = `nPOD case ID`,
         Tissue,
         Env03.prct = `% Env03 positive area in pancreas area`,
         Env03.prct.mean = `% Env03 positive area in pancreas area\r\nMean for each donor`,
         # CD3 & CD68 in Islets
         CD3.cells.prct.islet = `% of CD3+ cells in Islets`, 
         CD3.cells.prct.islet.mean = `% of CD3+ cells in Islets\r\n\r\nMean Donors`,
         CD3.cells.islet = `Number of CD3+ cells in Islets / mm2`, 
         CD3.cells.islet.mean = `Number of CD3+ cells in Islets / mm2\r\nMean Donors`,
         CD68.cells.prct.islet = `% of CD68+ cells in Islets`, 
         CD68.cells.prct.islet.mean = `% of CD68+ cells in Islets\r\n\r\nMean Donors`,
         CD68.cells.islet = `Number of CD68+ cells in Islets / mm2`, 
         CD68.cells.islet.mean = `Number of CD68+ cells in Islets / mm2\r\nMean Donors`,
         # CD3 & CD68 in Exocrine
         CD3.cells.prct.exocrine = `% of CD3+ cells in Exocrine`, 
         CD3.cells.prct.exocrine.mean = `% of CD3+ cells in Exocrine\r\n\r\nMean Donors`,
         CD3.cells.exocrine = `Number of CD3+ cells in exocrine / mm2`, 
         CD3.cells.exocrine.mean = `Number of CD3+ cells in exocrine / mm2\r\nMean Donors`,
         CD68.cells.prct.exocrine = `% of CD68+ cells in Exocrine`, 
         CD68.cells.prct.exocrine.mean = `% of CD68+ cells in Exocrine\r\n\r\nMean Donors`,
         CD68.cells.exocrine = `Number of CD68+ cells in exocrine / mm2`, 
         CD68.cells.exocrine.mean = `Number of CD68+ cells in exocrine / mm2\r\nMean Donors`,
         )

dat <- dat %>% 
  mutate(ID = substr(ID, 1, 4),
         Tissue = tolower(gsub("Pancreas ", "", Tissue)))

# Keep the part-stratified measurements, pivot non-mean cols to wide
dat.part <- dat %>%
  select(grep("mean", names(dat), invert = T)) %>%
  pivot_wider(names_from = Tissue, values_from = -c(ID, Tissue), names_sep = ".")

# looking at the original spreadsheet, mean values can be in either first or second entry for donor
dat <- dat %>%
  select(c("ID", grep("mean", names(dat)))) %>%
  group_by(ID) %>%
  summarize_all(function(x) if(all(is.na(x))) NA else x[!is.na(x)]) %>%
  full_join(dat.part, by = "ID")
 
write.table(dat, "PMID28878130_1_Levet-2017.tsv", sep = "\t", quote = F, row.names = F)
  
  

                  
avucoh/nPOD documentation built on April 1, 2020, 5:24 p.m.