inst/data-raw/process/PMID23064357_Richardson-2012/process.R

library(rvest)
library(data.table)

tab1 <- read_html("https://link.springer.com/article/10.1007/s00125-012-2745-4/tables/1") %>% html_table()
tab1 <- tab1[[1]]
names(tab1) <- c("ID", "age", "duration", "region", "VP1", "Insulin")

tab1 <- as.data.table(tab1)
tab1[, region := tolower(regmatches(region, regexpr("(?<=Pan)(.*)(?=-)", region, perl = T)))]
tab1[, VP1.pos := as.integer(grepl("Negative", VP1))]
# VP1 status also gives the count of ICIs with VP1 status, but that has to represented proportionally to the total number of ICIs examined
tab1[, VP1.ICI := as.numeric(substr(VP1, 1, 2))]
tab1[, ICI.n := as.numeric(substr(Insulin, 1, 2))]
tab1[, VP1.ICI.prct := round((VP1.ICI / ICI.n) * 100)]

# Because there was no intention to stratify VP1 by pancreas region (in which case the authors would try to have all blocks for each donor),
# we don't include stratified data
dataset <- tab1[, .(ID, VP1.pos, ICI.n, VP1.ICI.prct)]
write.table(dataset, "PMID23064357_1_Richardson-2012.tsv", sep = "\t", row.names = F, quote = F)
avucoh/nPOD documentation built on April 1, 2020, 5:24 p.m.