knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(ggplot2)
project_info <- data.frame(project = "Recla",
                           taxa = "CCmouse",
                           directory = "../../..",
                           stringsAsFactors = FALSE)
(taxa_dir <- file.path(project_info$directory, 
                          project_info$taxa))
(project_dir <- file.path(taxa_dir, 
                          project_info$project))

Assume local directory already created with taxa and project under, and with all data already installed.

if(!dir.exists(project_dir)) {
  warning("project director does not exist")
  knitr::knit_exit()
}

Setup

Region and Phenotype

# See shinyHotspot.R
hotspots <- readRDS(file.path(project_dir, "hotspot.rds"))
# See shinyHotspot.R
(hot_sum <- qtl2pattern::summary_scan1(hotspots$scan, hotspots$map) %>%
  filter(pheno == "all") %>%
  rename(count = lod) %>%
  select(-marker) %>%
  arrange(desc(count)))
chr_id <- as.character(hot_sum$chr[1])
pos_Mbp <- hot_sum$pos[1]
window_Mbp <- 5
start_val <- pos_Mbp - window_Mbp
end_val <- pos_Mbp + window_Mbp
# See shinyMain.R
peaks <- readRDS(file.path(project_dir, "peaks.rds")) %>%
  filter(chr == chr_id,
         pos >= start_val,
         pos <= end_val)
# See shinyPhenos.R
(peaks <- peaks %>%
  select(pheno, chr, pos, lod) %>%
  arrange(desc(lod))) %>%
  head(1)
pheno_name <- peaks$pheno[1]
# See shinyMain.R
analyses <- readRDS(file.path(project_dir, "analyses.rds")) %>%
  filter(pheno == pheno_name)
# See shinyMain.R
pheno_data <- readRDS(file.path(project_dir, "pheno_data.rds")) %>%
   qtl2pattern::pheno_trans(analyses$pheno, 
               analyses$transf,
               analyses$offset,
               analyses$winsorize)
# See shinyMain.R
covar <- readRDS(file.path(project_dir, "covar.rds")) %>%
  qtl2pattern::get_covar(analyses)

Genotype probabilities

# See shinyMain.R
kinship <- readRDS(file.path(project_dir, "kinship.rds"))[chr_id]
# See shinyProbs.R and Recla.Rmd
query_probs <- qtl2pattern::create_probs_query_func(project_dir)
probs_obj <- query_probs(chr_id, start_val, end_val)

SNP Scans

# See shinySNPSetup.R and shinyProbs.R
query_variants <- qtl2::create_variant_query_func(
  file.path(taxa_dir, "cc_variants.sqlite"))
snpinfo <- query_variants(chr_id, start_val, end_val)
snpprobs_obj <- qtl2pattern::get_snpprobs(chr_id, pos_Mbp, window_Mbp,
                   pheno_name, 
                   probs_obj$probs,
                   probs_obj$map,
                   snpinfo)
snp_scan_obj <- qtl2pattern::scan1_covar(pheno_data, covar, snpprobs_obj$snpprobs, kinship, analyses)
# See shinySNPSum and shinySNPSetup
top_snps_tbl <- qtl2pattern::top_snps_all(snp_scan_obj,
                        snpprobs_obj$snpinfo,
                        1.5)
(patterns <- summary(top_snps_tbl)) %>% head(1)

Mediation

This is ugly. It takes another package (CausalMST) and is under redesign. So I am documenting the steps.

First, identify all phenotypes that map to the region. This is done using peaks in the region to identify phenotype names and all_analyses to construct the appropriate phenotype transformation.

# See shinyMediate.R
all_analyses <- readRDS(file.path(project_dir, "analyses.rds")) %>%
  filter(pheno != pheno_name,
         pheno %in% peaks$pheno)
all_pheno_data <- readRDS(file.path(project_dir, "pheno_data.rds")) %>%
   qtl2pattern::pheno_trans(all_analyses$pheno, 
               all_analyses$transf,
               all_analyses$offset,
               all_analyses$winsorize)

The following works for mediators as other phenotypes. But see qtl2pattern::expr_region() and qtl2pattern::create_mrna_query_func() to create query_mrna() to access expression data.

The basic idea here is to construct a list containing at least three items:

There does not seem to be any way at present to allow different covariates for different phenotypes.

The annot data frame has components that are used by CausalMST routines. These are subject to change, but currently include:

med_ls <- qtl2pattern::pheno_region(
  chr_id, start_val, end_val, covar, probs_obj$map,
  peaks, all_analyses, all_pheno_data)
peak_mar <- qtl2::find_marker(probs_obj$map, chr_id, pos_Mbp)
# waiting for fix to qtl2
#geno_max <- qtl2::pull_genoprobpos(probs_obj$probs, peak_mar)
geno_max <- subset(probs_obj$probs, mar = peak_mar)[[1]][,,1]
driver_med <- probs_obj$probs[[chr_id]][,, unique(med_ls[[2]]$driver), drop = FALSE]

The CausalMST::mediate1_test() routine uses the first two components of the med_ls list. The pheno matrix columns are set of mediators for the target. The annot$driver is used in one-to-one fashion with columns of pheno as the names of the driver to be pulled from the argument driver_med. If either annot$driver or driver_med is NULL, then the common driver (second argument, here being geno_max) is used for both target and mediator(s).

All of the elements of annot are passed allong to the best element of the mediate1_test list returned by CausalMST::mediate1_test(). The mediate1_test also has elements params and targetFit. The params element includes the pos and lod for the target, which are used for plotting. There is currently no summary for a mediate1_test object.

mediate_obj <- CausalMST::mediation_test(
  target = pheno_data,
  mediator = med_ls[[1]], 
  annotation = med_ls[[2]],
  driver = geno_max, 
  covar_tar = covar,
  covar_med = med_ls$covar,
  kinship = kinship[[1]],
  driver_med = driver_med,
  test = "wilc",
  pos = pos_Mbp)
(sum_med <- summary(mediate_obj))

The plot uses additional information from the annot, particularly when using plotly. These include symbol, pos, QTL and biotype. These are rather arcane at the moment. To be improved.

autoplot(mediate_obj)
plotly::ggplotly(autoplot(mediate_obj))

The CausalMST::med_scatter() routine uses additional parts of the med_ls list.

med_name <- sum_med$id[1]
mediator <- med_ls[[1]][, med_name, drop = FALSE]
driver_name <- (med_ls[[2]] %>% 
  filter(id == med_name))$driver
driver <- driver_med[,, driver_name]
sdp <- snpprobs_obj$snpinfo$sdp[which.min(abs(snpprobs_obj$snpinfo$pos - sum_med$pos[1]))]
scat_dat <- CausalMST::mediation_triad(
  target = pheno_data, 
  mediator = mediator,
  driver = driver, 
  covar_tar = covar, 
  covar_med = med_ls$covar,
  kinship = kinship[[1]], 
  sdp = sdp, 
  allele = TRUE)
autoplot(scat_dat, tname = pheno_name, mname = med_name, dname = driver_name)
autoplot(scat_dat, type = "by_target")
autoplot(scat_dat, type = "driver_offset", centerline = NA)
autoplot(scat_dat, type = "driver", centerline = NA)


byandell/CausalMST documentation built on May 13, 2019, 9:26 a.m.