knitr::opts_chunk$set(echo = TRUE)
The popular R/qtl package for gene mapping has been redeveloped for high volume, multi-parent systems genetics data as R/qtl2. This new package provides a Shiny interface to extensions of R/qtl2.
http://pages.stat.wisc.edu/~yandell/software/qtl2shiny/ contains further information on the 'qtl2shiny' server in action, including R/qtl2shiny Screen Shots.
For more details on use of the package, see the R/qtl2shiny package, R/qtl2shiny User Guide and R/qtl2shiny Developer Guide. This document is created by inst/scripts/qtl2shiny_index.Rmd.
See also
This document sets up the Recla
project using data from https://github.com/rqtl/qtl2data for use with R/qtl2shiny.
The project_info
data frame contains control information about each project (see R/qtl2shiny).
This process takes some time, particularly for calculating genotype probabilities with 'qtl2::calc_genoprob' and scanning the phenotypes with 'qtl2::scan1'; see https://kbroman.org/qtl2/.
# For use in interactive mode for testing. if(!exists("params") & interactive()) { params <- list( root_directory = "../../qtl2shinyDemo", shiny_directory = "qtl2shinyData", project = "Recla", taxa = "CCmouse", dontrun = TRUE) }
(project_info <- data.frame(project = params$project, taxa = params$taxa, directory = params$shiny_directory))
Assume local directory already created with taxa
and project
under.
(localdir <- params$root_directory)
The processed data will reside in the following directory:
taxa_path <- file.path(project_info$directory, project_info$taxa) taxa_dir <- file.path(localdir, taxa_path) project_path <- file.path(taxa_path, project_info$project) (project_dir <- file.path(localdir, project_path))
The data is pulled from this internet directory:
(project_data <- file.path( "https://raw.githubusercontent.com/rqtl", "qtl2data/master/DO_Recla/recla.zip"))
If dontrun
is r TRUE
then just show remaining code without running.
if(params$dontrun) { warning("Showing rest of code without running") knitr::opts_chunk$set(eval = FALSE, echo = TRUE) }
Stop if local directory does not exist.
if(!dir.exists(localdir)) { warning(paste("local directory", localdir, "does not exist and must be created")) knitr::knit_exit() }
Stop if deepest project_dir
already exists. Assumption is that data already exists
and do not want to overwrite.
if(dir.exists(project_dir)) { warning(paste("project directory", project_dir, "already exists and will not be overwritten")) knitr::knit_exit() }
Create directories if they do not exist.
if(!dir.exists(project_dir)) { if(!dir.exists(taxa_dir)) { if(!dir.exists(data_dir <- file.path(localdir, project_info$directory))) dir.create(data_dir) dir.create(taxa_dir) } dir.create(project_dir) }
These are files that sit in the localdir
for use by the shiny app.
for(filename in c("app.R", "about.md", "about-extended.md")) { if(file.exists(fname <- file.path(localdir, filename))) { warning(paste("qtl2shiny file", fname, "already exists and will not be overwritten")) } else { file.copy(system.file(file.path("qtl2shinyApp", filename), package = "qtl2shiny"), fname) } }
allele_info <- data.frame( code = LETTERS[1:8], shortname = names(qtl2::CCcolors), longname = c("A/J","C57BL/6J","129S1/SvImJ","NOD/LtJ","NZO/HlLtJ","CAST/EiJ","PWK/PhJ","WSB/EiJ"), allelename = c("A_J","C57BL_6J","129S1_SvImJ","NOD_LtJ","NZO_HlLtJ","CAST_EiJ","PWK_PhJ","WSB_EiJ"), link = c("https://www.jax.org/strain/000646", "https://www.jax.org/strain/000664", "https://www.jax.org/strain/002448", "https://www.jax.org/strain/001289", "https://www.jax.org/strain/002105", "https://www.jax.org/strain/000928", "https://www.jax.org/strain/003715", "https://www.jax.org/strain/001145"), color = qtl2::CCcolors, color_name = c("yellow","grey","salmon","blue","aqua","green","red","purple"), origcolor = qtl2::CCorigcolors, stringsAsFactors = FALSE )
saveRDS(allele_info, file.path(taxa_dir, "allele_info.rds"))
taxa_info <- "Mus musculus"
saveRDS(taxa_info, file.path(taxa_dir, "taxa_info.rds"))
The query for genes and variants are usually common across all projects for a taxa.
Here we use query routines from R/qtl2 for the CC mouse. Do not create if they already exist.
Note that the taxa_path
is a relative address.
if(file.exists(qname <- file.path(taxa_dir, "query_genes.rds"))) { warning(paste("gene query", qname, "already exists and will not be overwritten")) } else { query_genes <- qtl2::create_gene_query_func( file.path(taxa_path, "mouse_genes.sqlite")) saveRDS(query_genes, qname) }
if(file.exists(qname <- file.path(taxa_dir, "query_variants.rds"))) { warning(paste("variant query", qname, "already exists and will not be overwritten")) } else { query_variants <- qtl2::create_variant_query_func( file.path(taxa_path, "cc_variants.sqlite")) saveRDS(query_variants, qname) }
Use create_probs_query_func
from package 'qtl2pattern' to query genotype probabilities.
Save it in RDS format for later use. Note that it requires a relative address to the project data provided by project_path
.
query_probs <- qtl2pattern::create_probs_query_func(project_path) saveRDS(query_probs, file.path(project_dir, "query_probs.rds"))
These data do not include mRNA data, so set up null routine. Make sure it has required arguments.
query_mrna <- qtl2shiny::create_mrna_query_func(NULL) saveRDS(query_mrna, file.path(project_dir, "query_mrna.rds"))
if(httr::HEAD(project_data)$status != 200) { warning(paste("project data for", project_info$project, "does not exist")) knitr::knit_exit() }
qtl2data <- qtl2::read_cross2(project_data)
covar <- dplyr::rename(qtl2data$covar, sex = "Sex")
saveRDS(covar, file.path(project_dir, "covar.rds")) saveRDS(qtl2data$pheno, file.path(project_dir, "pheno_data.rds"))
gmap <- qtl2::insert_pseudomarkers(qtl2data$gmap, step=1) pmap <- qtl2::interp_map(gmap, qtl2data$gmap, qtl2data$pmap) saveRDS(pmap, file.path(project_dir, "pmap.rds"))
Genotype probabilities can be very large and are best put in a fast database (such as "fst" or "feather"). Current recommendation is "fst". This step takes time, primarily in calculating the genotype probabilities.
fast <- "fst"
if(!dir.exists(genoprob_dir <- file.path(project_dir, "genoprob"))) { dir.create(genoprob_dir) } faprobs_file <- file.path(genoprob_dir, paste0("aprobs_fstindex.rds")) if(!file.exists(fprobs_file <- file.path(genoprob_dir, paste0("probs_fstindex.rds")))) { cat("allele pair genotype probabilities ...\n", file = stderr()) pr <- qtl2::calc_genoprob(qtl2data, gmap, err=0.002) cat(paste0("allele pair prob conversion via qtl2", fast, "\n"), file = stderr()) fast_genoprob <- switch(fast, fst = qtl2fst::fst_genoprob, feather = qtl2feather::feather_genoprob) fprobs <- fast_genoprob(pr, "probs", genoprob_dir, quiet = TRUE) saveRDS(fprobs, file = fprobs_file) cat("allele genotype probabilities\n", file = stderr()) apr <- qtl2::genoprob_to_alleleprob(pr) cat(paste0("allele prob conversion via qtl2", fast, "\n"), file = stderr()) faprobs <- fast_genoprob(apr, "aprobs", genoprob_dir, quiet = TRUE) saveRDS(faprobs, file = faprobs_file) } else { fprobs <- readRDS(fprobs_file) faprobs <- readRDS(faprobs_file) }
kinship_loco <- qtl2::calc_kinship(faprobs, "loco")
saveRDS(kinship_loco, file.path(project_dir, "kinship.rds"))
form <- formula(paste("~", paste(names(covar), collapse = "+"))) addcovar <- stats::model.matrix(form, covar)[, -1, drop = FALSE] out <- qtl2::scan1(faprobs, qtl2data$pheno, kinship_loco, addcovar=addcovar)
peaks <- qtl2::find_peaks(out, pmap, threshold = 3)
peaks <- dplyr::select( dplyr::mutate( dplyr::rename( peaks, pheno = "lodcolumn"), longname = .data$pheno, output = .data$pheno, pheno_group = "group", # used to group phenotypes pheno_type = "type"), # used to type phenotypes -.data$lodindex)
analyses_tbl <- dplyr::mutate( dplyr::select( dplyr::distinct( peaks, .data$pheno, .keep_all = TRUE), -.data$lod, -.data$pos, -.data$chr), model = "normal", transf = "identity", offset = 0, winsorize = FALSE) for(i in names(covar)) analyses_tbl[[i]] <- TRUE
saveRDS(peaks, file.path(project_dir, "peaks.rds")) saveRDS(analyses_tbl, file.path(project_dir, "analyses.rds"))
localdir
list.files(localdir)
taxa_dir
list.files(taxa_dir)
project_dir
list.files(project_dir)
list.files(file.path(project_dir, "genoprob"))
file.size(file.path(project_dir, "genoprob", "probs_fstindex.rds"))
file.size(file.path(project_dir, "genoprob", "probs_1.fst"))
hots <- qtl2shiny::hotspot(pmap, peaks)
saveRDS(hots, file.path(project_dir, "hotspot.rds"))
# See shinyHotspot.R hotspots <- readRDS(file.path(project_dir, "hotspot.rds"))
# See shinyHotspot.R qtl2ggplot::ggplot_scan1(hotspots$scan, hotspots$map)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.