knitr::opts_chunk$set(echo = TRUE)
library(dplyr) library(ggplot2)
project_info <- data.frame(project = "Recla", taxa = "CCmouse", directory = "../../..", stringsAsFactors = FALSE)
(taxa_dir <- file.path(project_info$directory, project_info$taxa))
(project_dir <- file.path(taxa_dir, project_info$project))
Assume local directory already created with taxa
and project
under,
and with all data already installed.
if(!dir.exists(project_dir)) { warning("project director does not exist") knitr::knit_exit() }
# See shinyHotspot.R hotspots <- readRDS(file.path(project_dir, "hotspot.rds"))
# See shinyHotspot.R (hot_sum <- qtl2pattern::summary_scan1(hotspots$scan, hotspots$map) %>% filter(pheno == "all") %>% rename(count = lod) %>% select(-marker) %>% arrange(desc(count)))
chr_id <- as.character(hot_sum$chr[1]) pos_Mbp <- hot_sum$pos[1] window_Mbp <- 5 start_val <- pos_Mbp - window_Mbp end_val <- pos_Mbp + window_Mbp
# See shinyMain.R peaks <- readRDS(file.path(project_dir, "peaks.rds")) %>% filter(chr == chr_id, pos >= start_val, pos <= end_val)
# See shinyPhenos.R (peaks <- peaks %>% select(pheno, chr, pos, lod) %>% arrange(desc(lod))) %>% head(1)
pheno_name <- peaks$pheno[1]
# See shinyMain.R analyses <- readRDS(file.path(project_dir, "analyses.rds")) %>% filter(pheno == pheno_name)
# See shinyMain.R pheno_data <- readRDS(file.path(project_dir, "pheno_data.rds")) %>% qtl2pattern::pheno_trans(analyses$pheno, analyses$transf, analyses$offset, analyses$winsorize)
# See shinyMain.R covar <- readRDS(file.path(project_dir, "covar.rds")) %>% qtl2pattern::get_covar(analyses)
# See shinyMain.R kinship <- readRDS(file.path(project_dir, "kinship.rds"))[chr_id]
# See shinyProbs.R and Recla.Rmd query_probs <- qtl2pattern::create_probs_query_func(project_dir) probs_obj <- query_probs(chr_id, start_val, end_val)
# See shinySNPSetup.R and shinyProbs.R query_variants <- qtl2::create_variant_query_func( file.path(taxa_dir, "cc_variants.sqlite")) snpinfo <- query_variants(chr_id, start_val, end_val)
snpprobs_obj <- qtl2pattern::get_snpprobs(chr_id, pos_Mbp, window_Mbp, pheno_name, probs_obj$probs, probs_obj$map, snpinfo)
snp_scan_obj <- qtl2pattern::scan1_covar(pheno_data, covar, snpprobs_obj$snpprobs, kinship, analyses)
# See shinySNPSum and shinySNPSetup top_snps_tbl <- qtl2pattern::top_snps_all(snp_scan_obj, snpprobs_obj$snpinfo, 1.5) (patterns <- summary(top_snps_tbl)) %>% head(1)
This is ugly. It takes another package (CausalMST
) and is under redesign.
So I am documenting the steps.
First, identify all phenotypes that map to the region. This is done using peaks
in the region to identify phenotype names and all_analyses
to construct the appropriate phenotype transformation.
# See shinyMediate.R all_analyses <- readRDS(file.path(project_dir, "analyses.rds")) %>% filter(pheno != pheno_name, pheno %in% peaks$pheno) all_pheno_data <- readRDS(file.path(project_dir, "pheno_data.rds")) %>% qtl2pattern::pheno_trans(all_analyses$pheno, all_analyses$transf, all_analyses$offset, all_analyses$winsorize)
The following works for mediators as other phenotypes. But see qtl2pattern::expr_region()
and qtl2pattern::create_mrna_query_func()
to create query_mrna()
to access expression data.
The basic idea here is to construct a list containing at least three items:
pheno
: matrix of phenotypes (columns) by individuals (rows)annot
: data frame of annotationcovar
: data frame of covariatespeaks
: data frame of peak information (optional)There does not seem to be any way at present to allow different covariates for different phenotypes.
The annot
data frame has components that are used by CausalMST
routines. These are subject to change, but currently include:
chr
pos
id
: typically column names of pheno
object in med_ls
longname
(or symbol
for expression data)FALSE
(TRUE
for traits that reside and map to region)biotype
: refers to expression biotype (pheno_type
for other phenos)qtl_ct
: count of number of QTLsQTL
: compressed info on QTLsdriver
: names of driver for mediator (optional)med_ls <- qtl2pattern::pheno_region( chr_id, start_val, end_val, covar, probs_obj$map, peaks, all_analyses, all_pheno_data)
peak_mar <- qtl2::find_marker(probs_obj$map, chr_id, pos_Mbp) # waiting for fix to qtl2 #geno_max <- qtl2::pull_genoprobpos(probs_obj$probs, peak_mar) geno_max <- subset(probs_obj$probs, mar = peak_mar)[[1]][,,1] driver_med <- probs_obj$probs[[chr_id]][,, unique(med_ls[[2]]$driver), drop = FALSE]
The CausalMST::mediate1_test()
routine uses the first two components of the med_ls
list. The pheno
matrix columns are set of mediators for the target.
The annot$driver
is used in one-to-one fashion with columns of pheno
as the names of the driver to be pulled from the argument driver_med
. If either annot$driver
or driver_med
is NULL
, then the common driver
(second argument, here being geno_max
) is used for both target
and mediator(s).
All of the elements of annot
are passed allong to the best
element of the mediate1_test
list returned by CausalMST::mediate1_test()
. The mediate1_test
also has elements params
and targetFit
. The params
element includes the pos
and lod
for the target
, which are used for plotting. There is currently no summary
for a mediate1_test
object.
mediate_obj <- CausalMST::mediation_test( target = pheno_data, mediator = med_ls[[1]], annotation = med_ls[[2]], driver = geno_max, covar_tar = covar, covar_med = med_ls$covar, kinship = kinship[[1]], driver_med = driver_med, test = "wilc", pos = pos_Mbp)
(sum_med <- summary(mediate_obj))
The plot uses additional information from the annot
, particularly when using plotly
. These include symbol
, pos
, QTL
and biotype
. These are rather arcane at the moment. To be improved.
autoplot(mediate_obj)
plotly::ggplotly(autoplot(mediate_obj))
The CausalMST::med_scatter()
routine uses additional parts of the med_ls
list.
med_name <- sum_med$id[1] mediator <- med_ls[[1]][, med_name, drop = FALSE] driver_name <- (med_ls[[2]] %>% filter(id == med_name))$driver driver <- driver_med[,, driver_name]
sdp <- snpprobs_obj$snpinfo$sdp[which.min(abs(snpprobs_obj$snpinfo$pos - sum_med$pos[1]))]
scat_dat <- CausalMST::mediation_triad( target = pheno_data, mediator = mediator, driver = driver, covar_tar = covar, covar_med = med_ls$covar, kinship = kinship[[1]], sdp = sdp, allele = TRUE)
autoplot(scat_dat, tname = pheno_name, mname = med_name, dname = driver_name)
autoplot(scat_dat, type = "by_target")
autoplot(scat_dat, type = "driver_offset", centerline = NA)
autoplot(scat_dat, type = "driver", centerline = NA)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.