inst/data-raw/process/Core_HLA/process.R

library(data.table)

# HLA
HLA <- fread("HiResHLAclean.tsv")
HLA <- HLA[, .(nPODCaseID, a_1, a_2, b_1, b_2, c_1, c_2, DRB1_1, DRB1_2, DQA1_1, DQA1_2, DQB1_1, DQB1_2, DPA1_1, DPA1_2, DPB1_1, DPB1_2)]
setnames(HLA, c("ID", toupper(gsub("_", ".", names(HLA)[-1]))))

HLA[, ID := as.numeric(ID)]
# Some mistakes and inconsistencies in the original file,
# e.g. these entries: "16:XX" "Second s" "11:001" "11:1"
fix1 <- which(HLA == "11:001", arr.ind = T)
HLA[fix1[, "row"], fix1[, "col"]] <- "11:01"

fix2 <- which(HLA == "11:1", arr.ind = T)
HLA[fix2[, "row"], fix2[, "col"]] <- "11:01"

fix3 <- which(HLA == "Second s", arr.ind = T)
HLA[fix3[, "row"], fix3[, "col"]] <- ""

fix4 <- which(HLA == "16:XX")
HLA[fix4[, "row"], fix4[, "col"]] <- "16"

write.table(HLA, file = "Core/HiResHLAclean_ref.tsv", sep = "\t", row.names = F)
avucoh/nPOD documentation built on April 1, 2020, 5:24 p.m.