View source: R/novelty_check.R
novelty_check | R Documentation |
This function checks novelty of a list of loci such as pQTLs against a published list. Both known_loci and query_loci have these variables: chr, pos, uniprot, rsid, prot.
novelty_check(
known_loci,
query_loci,
ldops = NULL,
flanking = 1e+06,
pop = "EUR",
verbose = TRUE
)
known_loci |
A data.frame of published loci. |
query_loci |
A data.frame of loci whose novelties are unclear. |
ldops |
arguments for ieugwasr::ld_matrix_local() |
flanking |
A flanking distance. |
pop |
The reference population as for ieugwasr::ld_matrix(). |
verbose |
A flag to show nonexistent variants. |
A data.frame containing nonnovel loci.
## Not run:
options(width=2000)
suppressMessages(require(dplyr))
suppressMessages(require(openxlsx))
# SCALLOP-INF list
METAL <- read.delim(file.path(find.package("pQTLtools"),"tests","INF1.METAL")) %>%
dplyr::left_join(gap.datasets::inf1[c("prot","gene")]) %>%
dplyr::mutate(prot=gene,prot_rsid=paste0(uniprot,"-",rsid),chr=Chromosome,pos=Position)
# UKB_PPP list
results <- "/rds/project/jmmh2/rds-jmmh2-results/public/proteomics"
url <- file.path(results,"UKB-PPP","doc","sun22.xlsx")
ST10 <- read.xlsx(url,"ST10",startRow=3) %>%
dplyr::mutate(uniprot=Target.UniProt,rsid=rsID,prot=Assay.Target) %>%
dplyr::mutate(prot_rsid=paste0(uniprot,"-",rsid))
sentinels <- dplyr::left_join(METAL,ST10,by="prot_rsid") %>%
dplyr::select(prot_rsid,cis.trans,rsID) %>%
dplyr::filter(!is.na(rsID))
inf1 <- c(with(gap.datasets::inf1,uniprot),with(METAL,uniprot)) %>%
unique()
overlap <- dplyr::filter(ST10,uniprot %in% inf1)
dim(overlap)
UKB_PPP <- dplyr::mutate(overlap,
chrpos=strsplit(overlap[["Variant.ID.(CHROM:GENPOS.(hg37):A0:A1:imp:v1)"]],":"),
chr=as.integer(unlist(lapply(chrpos,"[[",1))),
pos=as.integer(unlist(lapply(chrpos,"[[",2))),
chrpos=paste(chr,pos,sep=":"))
# ieugwasr LD reference which requires an up-to-date registration.
suppressMessages(require(GenomicRanges))
b <- novelty_check(UKB_PPP,METAL)
replication <- dplyr::filter(b,r2>=0.8)
INF <- "/rds/project/jmmh2/rds-jmmh2-projects/olink_proteomics/scallop/INF/"
# write.table(replication,file=file.path(INF,"work","UKB-PPP.txt"),
# row.names=FALSE,quote=FALSE,sep="\t")
replication <- read.delim(file.path(find.package("pQTLtools"),"tests","UKB-PPP.txt")) %>%
dplyr::select(known.seqnames,known.rsid,query.rsid,query.prot)
variant_list <- unique(c(dplyr::pull(replication,known.rsid),
dplyr::pull(replication,query.rsid)))
load(file.path(find.package("pQTLtools"),"tests","novel_data.rda"))
prot_rsid <- with(novel_data,paste0(prot,"-",rsid))
prot_rsid_repl <- with(replication,paste0(query.prot,"-",query.rsid))
left <- setdiff(prot_rsid,prot_rsid_repl)
# local LD reference panel by chromosome
# r2 <- LDlinkR::LDmatrix(variant_list,pop="CEU",token=Sys.getenv("LDLINK_TOKEN"))
plink <- "/rds/user/jhz22/hpc-work/bin/plink"
b <- list()
for(i in unique(dplyr::pull(METAL,Chromosome)))
{
u <- dplyr::filter(UKB_PPP,chr %in% i) %>%
dplyr::select(chr,pos,uniprot,rsid,prot)
m <- dplyr::filter(METAL,Chromosome %in% i) %>%
dplyr::select(chr,pos,uniprot,rsid,prot)
bfile <- file.path(INF,"INTERVAL","per_chr",paste0("chr",i))
b[[i]] <- novelty_check(u,m,ldops=list(bfile=bfile,plink=plink))
}
replication2 <- dplyr::filter(bind_rows(b), r2>=0.8)
prot_rsid <- with(novel_data %>%
dplyr::left_join(gap.datasets::inf1[c("prot","gene")]),paste0(gene,"-",rsid))
prot_rsid_repl <- with(replication2,paste0(query.prot,"-",query.rsid))
novel <- setdiff(prot_rsid,prot_rsid_repl)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.