inst/data-raw/process/PMID26581594_CampbellThompson-2016/process.R

library(rvest)

tab1 <- read_html("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4764143/table/T1/") %>% html_table()
tab1 <- tab1[[2]]
tab1 <- tab1[, -1] %>% as_tibble() %>%
  select(ID = `Case ID`,
         sections.n = `Sections (n)`,
         InsPos.CD3Neg = `Ins+ CD3`,
         InsPos.CD3Pos = `Ins+ CD3+`,
         InsNeg.CD3Pos = `InsCD3+`,
         InsNeg.CD3Neg = `InsCD3`,
         islets.n = `Total islets*`,
         Insulitis.prct = `Insulitis frequency (%)†`) %>%
  filter(grepl("^6", ID)) %>%
  mutate_all(function(x) as.numeric(gsub(",", "", x)))

# From paper: †Insulitis frequency (%) calculated as sum insulitic islets: (Ins+ CD3+ and InsCD3+)/total islets.
# Desirable also to calculate similar percentages from data, i.e. InsPos.CD3Neg.prct, InsPos.CD3Pos.prct, InsNeg.CD3Pos.prct, InsNeg.CD3Neg.prct
dataset <- tab1 %>%
   transmute(ID = ID, Insulitis.prct = Insulitis.prct,
             InsPos.CD3Neg.prct = InsPos.CD3Neg/islets.n * 100,
             InsPos.CD3Pos.prct = InsPos.CD3Pos/islets.n * 100,
             InsNeg.CD3Pos.prct = InsNeg.CD3Pos/islets.n * 100,
             InsNeg.CD3Neg.prct = InsNeg.CD3Neg/islets.n * 100)

write.table(dataset, "PMID26581594_1_CampbellThompson-2016.tsv", sep = "\t", row.names = F, quote = F)

# Export insulin measurements as a separate dataset
# This is a strange circumstance where the actual data values can be found in another later paper
# See note for https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6309032/table/table1-0022155418778546/
# -- "Insulin fractional area and mass were previously reported (in https://www.ncbi.nlm.nih.gov/pubmed/26581594)"

Ins.data <- read_html("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6309032/table/table1-0022155418778546/") %>% html_table()
Ins.data <- Ins.data[[2]][, c("Case ID", "Insulin Area (%)c", "Insulin Mass (mg)c")]
Ins.data <- Ins.data %>% as_tibble() %>%
  select(ID = `Case ID`,
         Ins.prct = `Insulin Area (%)c`,
         Ins.mass = `Insulin Mass (mg)c`) %>%
  filter(grepl("^6", ID)) %>%
  mutate_all(as.numeric)

write.table(dataset, "PMID26581594_2_CampbellThompson-2016.tsv", sep = "\t", row.names = F, quote = F)
avucoh/nPOD documentation built on April 1, 2020, 5:24 p.m.