novelty_check: Locus novelty check

View source: R/utils.R

novelty_checkR Documentation

Locus novelty check

Description

This function checks novelty of a list of loci such as pQTLs against a published list. Both known_loci and query_loci have these variables: chr, pos, uniprot, rsid, prot.

Usage

novelty_check(
  known_loci,
  query_loci,
  flanking = 1e+06,
  pop = "EUR",
  verbose = TRUE
)

Arguments

known_loci

A data.frame of published loci.

query_loci

A data.frame of loci whose novelties are unclear.

flanking

A flanking distance.

pop

The reference population as for ieugwasr::ld_matrix().

verbose

A flag to show nonexistent variants.

Value

A data.frame containing nonnovel loci.

Examples

## Not run:  
 options(width=2000)
 suppressMessages(require(dplyr))
 # SCALLOP-INF list
 METAL <- read.delim(file.path(find.package("pQTLtools"),"tests","INF1.METAL")) %>%
          mutate(prot_rsid=paste0(uniprot,"-",rsid),pos=Position)
 # UKB_PPP list
 require(openxlsx)
 url <- "~/rds/results/public/proteomics/UKB-PPP/doc/sun22.xlsx"
 ST10 <- read.xlsx(url,"ST10",startRow=3) %>%
         mutate(uniprot=Target.UniProt,rsid=rsID,prot=Assay.Target) %>%
         mutate(prot_rsid=paste0(uniprot,"-",rsid))
 sentinels <- left_join(METAL,ST10,by="prot_rsid") %>%
              select(prot_rsid,cis.trans,rsID) %>%
              filter(!is.na(rsID))
 inf1 <- c(with(gap.datasets::inf1,uniprot),with(METAL,uniprot)) %>%
         unique()
 overlap <- filter(ST10,uniprot %in% inf1)
 dim(overlap)
 UKB_PPP <- mutate(overlap,
            chrpos=strsplit(overlap[["Variant.ID.(CHROM:GENPOS.(hg37):A0:A1:imp:v1)"]],":"),
            chr=lapply(chrpos,"[[",1),
            pos=lapply(chrpos,"[[",2),
            chrpos=paste(chr,pos,sep=":"))
 suppressMessages(require(GenomicRanges))
 b <- novelty_check(UKB_PPP,METAL)
 replication <- filter(b,r2>=0.8)
 # Mutual uses of pQTLtools/SCALLOP-INF
 write.table(replication,file="~/INF/work/UKB-PPP.txt",row.names=FALSE,quote=FALSE,sep="\t")
 load(file.path(find.package("pQTLtools"),"tests","novel_data.rda"))
 prot_rsid <- with(novel_data,paste0(prot,"-",rsid))
 prot_rsid_repl <- with(replication,paste0(query.prot,"-",query.rsid))
 left <- setdiff(prot_rsid,prot_rsid_repl)
 # local LD reference panel,
 # https://raw.githubusercontent.com/jinghuazhao/INF/master/rsid/UKB-PPP.sh:
 variant_list <- b[c("UKB.seqnames","UKB.rsid","SCALLOP.rsid")]
 for (i in 1:nrow(variant_list))
 {
     z <- variant_list[i,]
     r <- ieugwasr::ld_matrix_local(z[c("known.rsid","query.rsid")],with_alleles=TRUE,
                                    bfile=file.path(INF,"INTERVAL","per_chr",paste0("interval.imputed.olink.chr_",z$UKB.seqnames)),
                                    plink_bin="/rds/user/jhz22/hpc-work/bin/plink")
     r <- ifelse(nrow(r)==2,r[1,2],r)
     variant_list[i,"r"] <- round(r,3)
 }
 # LDlink:
 # token <- Sys.getenv("LDLINK_TOKEN")
 # r2 <- LDlinkR::LDmatrix(variant_list,pop="CEU",token=token)

## End(Not run)

jinghuazhao/pQTLtools documentation built on May 3, 2024, 2:05 p.m.