novelty_check: Locus novelty check

View source: R/novelty_check.R

novelty_checkR Documentation

Locus novelty check

Description

This function checks novelty of a list of loci such as pQTLs against a published list. Both known_loci and query_loci have these variables: chr, pos, uniprot, rsid, prot.

Usage

novelty_check(
  known_loci,
  query_loci,
  ldops = NULL,
  flanking = 1e+06,
  pop = "EUR",
  verbose = TRUE
)

Arguments

known_loci

A data.frame of published loci.

query_loci

A data.frame of loci whose novelties are unclear.

ldops

arguments for ieugwasr::ld_matrix_local()

flanking

A flanking distance.

pop

The reference population as for ieugwasr::ld_matrix().

verbose

A flag to show nonexistent variants.

Value

A data.frame containing nonnovel loci.

Examples

## Not run: 
options(width=2000)
suppressMessages(require(dplyr))
suppressMessages(require(openxlsx))
# SCALLOP-INF list
METAL <- read.delim(file.path(find.package("pQTLtools"),"tests","INF1.METAL")) %>%
         dplyr::left_join(gap.datasets::inf1[c("prot","gene")]) %>%
         dplyr::mutate(prot=gene,prot_rsid=paste0(uniprot,"-",rsid),chr=Chromosome,pos=Position)
# UKB_PPP list
results <- "/rds/project/jmmh2/rds-jmmh2-results/public/proteomics"
url <- file.path(results,"UKB-PPP","doc","sun22.xlsx")
ST10 <- read.xlsx(url,"ST10",startRow=3) %>%
        dplyr::mutate(uniprot=Target.UniProt,rsid=rsID,prot=Assay.Target) %>%
        dplyr::mutate(prot_rsid=paste0(uniprot,"-",rsid))
sentinels <- dplyr::left_join(METAL,ST10,by="prot_rsid") %>%
             dplyr::select(prot_rsid,cis.trans,rsID) %>%
             dplyr::filter(!is.na(rsID))
inf1 <- c(with(gap.datasets::inf1,uniprot),with(METAL,uniprot)) %>%
        unique()
overlap <- dplyr::filter(ST10,uniprot %in% inf1)
dim(overlap)
UKB_PPP <- dplyr::mutate(overlap,
           chrpos=strsplit(overlap[["Variant.ID.(CHROM:GENPOS.(hg37):A0:A1:imp:v1)"]],":"),
           chr=as.integer(unlist(lapply(chrpos,"[[",1))),
           pos=as.integer(unlist(lapply(chrpos,"[[",2))),
           chrpos=paste(chr,pos,sep=":"))
# ieugwasr LD reference which requires an up-to-date registration.
suppressMessages(require(GenomicRanges))
b <- novelty_check(UKB_PPP,METAL)
replication <- dplyr::filter(b,r2>=0.8)
INF <- "/rds/project/jmmh2/rds-jmmh2-projects/olink_proteomics/scallop/INF/"
# write.table(replication,file=file.path(INF,"work","UKB-PPP.txt"),
#             row.names=FALSE,quote=FALSE,sep="\t")
replication <- read.delim(file.path(find.package("pQTLtools"),"tests","UKB-PPP.txt")) %>%
               dplyr::select(known.seqnames,known.rsid,query.rsid,query.prot)
variant_list <- unique(c(dplyr::pull(replication,known.rsid),
                         dplyr::pull(replication,query.rsid)))
load(file.path(find.package("pQTLtools"),"tests","novel_data.rda"))
prot_rsid <- with(novel_data,paste0(prot,"-",rsid))
prot_rsid_repl <- with(replication,paste0(query.prot,"-",query.rsid))
left <- setdiff(prot_rsid,prot_rsid_repl)
# local LD reference panel by chromosome
# r2 <- LDlinkR::LDmatrix(variant_list,pop="CEU",token=Sys.getenv("LDLINK_TOKEN"))
plink <- "/rds/user/jhz22/hpc-work/bin/plink"
b <- list()
for(i in unique(dplyr::pull(METAL,Chromosome)))
{
   u <- dplyr::filter(UKB_PPP,chr %in% i) %>%
        dplyr::select(chr,pos,uniprot,rsid,prot)
   m <- dplyr::filter(METAL,Chromosome %in% i) %>%
        dplyr::select(chr,pos,uniprot,rsid,prot)
   bfile <- file.path(INF,"INTERVAL","per_chr",paste0("chr",i))
   b[[i]] <- novelty_check(u,m,ldops=list(bfile=bfile,plink=plink))
}
replication2 <- dplyr::filter(bind_rows(b), r2>=0.8)
prot_rsid <- with(novel_data %>%
             dplyr::left_join(gap.datasets::inf1[c("prot","gene")]),paste0(gene,"-",rsid))
prot_rsid_repl <- with(replication2,paste0(query.prot,"-",query.rsid))
novel <- setdiff(prot_rsid,prot_rsid_repl)

## End(Not run)

jinghuazhao/pQTLtools documentation built on Dec. 16, 2024, 3:44 p.m.