# install devtools install.packages('devtools') library(devtools)
# install package from github install_github("mbyvcm/MRCIEUGTEx", quiet = T) library(MRCIEUGTEx)
These can be added manually if you know where GTEx files can be found on RDSF:
# path to GTEx VCF file gtex_vcf_dir <- "" # path to expression & covariate tar directories downloaded from GTEx Portal covariate_matrix_tar <- "" expression_matrix_tar <- ""
alternatively the GTEx directory on RDSF (should) contain a config.R file which can be sourced to attach these path variables:
source('config.R')
"covariate_matrix_tar" and "expression_matrix_tar" are matricies that contain normalised, QC'd data and are downloadable from the GTEx Portal here:
http://www.gtexportal.org/static/datasets/gtex_analysis_v6/single_tissue_eqtl_data/ GTEx_Analysis_V6_eQTLInputFiles_geneLevelNormalizedExpressionMatrices.tar.gz
http://www.gtexportal.org/static/datasets/gtex_analysis_v6/single_tissue_eqtl_data/ GTEx_Analysis_V6_eQTLInputFiles_covariates.tar.gz
In order to generate a polygenic risk score, a "query data.frame" containg information for the sentinel SNPs are required.
SNP|effect.allele|other.allele|beta ---|---|---|--- rs123|A|C|0.136 rs456|G|A|-0.324
The "TwoSampleMR" [https://mrcieu.github.io/TwoSampleMR/] R package can be used to generate a similar dataframe for a given outcome and a p-value threshold.
# extract SNPs from GTEx VCF files gtex_query <- extract_query_snps_gtex(query_data_frame = query_data_frame, gtex_vcf_dir = gtex_vcf_dir) # calculate PRS geno <- calculate_prs_gtex(query_data_frame = df, gtex_data_frame = gtex_query)
If several PRS have been generated for different p-value thresholds, these can be combined in a list. This will be quicker than running each PRS separatly (because this will require expression matricies to be reloaded):
genoList <- list('PRS1' = geno, 'PRS2' = geno2)
Make a character vector of the tissues you want to analyse. The available_tissues() may help if the 'expression_matrix_tar' path is set.
tissue <- available_tissues(expression_matrix_tar) tissue <- tissue[grep(tissue, pattern = 'Brain')] output <- run_eqtl(geno = genoList, tissue = tissue)
Extract 'top hits'
tophits <- extract_top_hits(x = output, fdr = T, pthresh = 0.05)
Render volcano plots with gene symbols added to significant genes:
vp <- volcanoplot(output, fdr = T, pthresh = 0.05)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.