novelty_check | R Documentation |
This function checks novelty of a list of loci such as pQTLs against a published list. Both known_loci and query_loci have these variables: chr, pos, uniprot, rsid, prot.
novelty_check(
known_loci,
query_loci,
flanking = 1e+06,
pop = "EUR",
verbose = TRUE
)
known_loci |
A data.frame of published loci. |
query_loci |
A data.frame of loci whose novelties are unclear. |
flanking |
A flanking distance. |
pop |
The reference population as for ieugwasr::ld_matrix(). |
verbose |
A flag to show nonexistent variants. |
A data.frame containing nonnovel loci.
## Not run:
options(width=2000)
suppressMessages(require(dplyr))
# SCALLOP-INF list
METAL <- read.delim(file.path(find.package("pQTLtools"),"tests","INF1.METAL")) %>%
mutate(prot_rsid=paste0(uniprot,"-",rsid),pos=Position)
# UKB_PPP list
require(openxlsx)
url <- "~/rds/results/public/proteomics/UKB-PPP/doc/sun22.xlsx"
ST10 <- read.xlsx(url,"ST10",startRow=3) %>%
mutate(uniprot=Target.UniProt,rsid=rsID,prot=Assay.Target) %>%
mutate(prot_rsid=paste0(uniprot,"-",rsid))
sentinels <- left_join(METAL,ST10,by="prot_rsid") %>%
select(prot_rsid,cis.trans,rsID) %>%
filter(!is.na(rsID))
inf1 <- c(with(gap.datasets::inf1,uniprot),with(METAL,uniprot)) %>%
unique()
overlap <- filter(ST10,uniprot %in% inf1)
dim(overlap)
UKB_PPP <- mutate(overlap,
chrpos=strsplit(overlap[["Variant.ID.(CHROM:GENPOS.(hg37):A0:A1:imp:v1)"]],":"),
chr=lapply(chrpos,"[[",1),
pos=lapply(chrpos,"[[",2),
chrpos=paste(chr,pos,sep=":"))
suppressMessages(require(GenomicRanges))
b <- novelty_check(UKB_PPP,METAL)
replication <- filter(b,r2>=0.8)
# Mutual uses of pQTLtools/SCALLOP-INF
write.table(replication,file="~/INF/work/UKB-PPP.txt",row.names=FALSE,quote=FALSE,sep="\t")
load(file.path(find.package("pQTLtools"),"tests","novel_data.rda"))
prot_rsid <- with(novel_data,paste0(prot,"-",rsid))
prot_rsid_repl <- with(replication,paste0(query.prot,"-",query.rsid))
left <- setdiff(prot_rsid,prot_rsid_repl)
# local LD reference panel,
# https://raw.githubusercontent.com/jinghuazhao/INF/master/rsid/UKB-PPP.sh:
variant_list <- b[c("UKB.seqnames","UKB.rsid","SCALLOP.rsid")]
for (i in 1:nrow(variant_list))
{
z <- variant_list[i,]
r <- ieugwasr::ld_matrix_local(z[c("known.rsid","query.rsid")],with_alleles=TRUE,
bfile=file.path(INF,"INTERVAL","per_chr",paste0("interval.imputed.olink.chr_",z$UKB.seqnames)),
plink_bin="/rds/user/jhz22/hpc-work/bin/plink")
r <- ifelse(nrow(r)==2,r[1,2],r)
variant_list[i,"r"] <- round(r,3)
}
# LDlink:
# token <- Sys.getenv("LDLINK_TOKEN")
# r2 <- LDlinkR::LDmatrix(variant_list,pop="CEU",token=token)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.