knitr::opts_chunk$set(echo = TRUE)

Setup

suppressPackageStartupMessages({
  library(qtl2geno)
  library(qtl2scan)
  library(qtl2ggplot)
})
pheno_names <- stringr::str_split(params$pheno_names, ",")[[1]]
datapath <- as.character(params$datapath)

Phenotypes and Covariates

Set up data files. This has dataset specific information. Note filtering to smaller subsets below.

R object | description ---------------|------------ analyses_tbl | table of analysis settings from Karl peaks | table of previously computed LOD peaks from Karl pheno_data | table of phenotype data pheno_type | vector of phenotype data types covar | matrix of covariates

covar <- readRDS(file.path(datapath, "covar.rds"))

Filter peaks and analyses_tbl to "best" analysis (may be called anal1 or anal2).

peaks <- readRDS(file.path(datapath, "peaks.rds"))
peak_info <- (
  dplyr::ungroup(
    dplyr::summarize(
      dplyr::arrange(
        dplyr::distinct(
          dplyr::group_by(peaks, pheno), 
          output), 
        dplyr::desc(output)), 
      output = output[1])
    )
  )$output
peaks <- dplyr::filter(peaks, output %in% peak_info)
analyses_tbl <- dplyr::filter(readRDS(file.path(datapath, "analyses.rds")), 
                              output %in% peak_info)
peak_info <- peaks$pheno
pheno_data <- DOread::read_pheno_tbl(analyses_tbl, datapath)
pheno_data <- dplyr::select(pheno_data, 
                            which(names(pheno_data) %in% peak_info))
rm(peak_info)
pheno_type <- c("all", sort(unique(analyses_tbl$pheno_type)))

Genotype probabilities and genome features

In addition, the following objects are read during workflow. They are in the DerivedData folder. See specific functions for how they are read.

R object | description -------------|------------ K_chr | list of kinship matrices probs.rds | calc_genoprob object for whole genome

Read kinship matrix. Can be single chromosome or list for whole genome.

K_chr <- readRDS(file.path(datapath, "kinship.rds"))

Read genotype probability object across 8 CC founder alleles for chr r chr_id. Can be single chromosome or whole genome. DOread::read_probs reads probs for one or more chromosomes, or all chromosomes. See qtl2geno::calc_genoprob.

probs_obj <- DOread::read_probs(, datapath)

Specific phenotypes for this workflow

Filter analyses table and get phenotypes and covariate objects. Do appropriate transformations on phenotypes as needed.

analyses_df <- dplyr::filter(analyses_tbl, pheno %in% pheno_names)
phe_df <- DOread::get_pheno(pheno_data,
                            dplyr::distinct(analyses_df, 
                                            pheno, .keep_all=TRUE))
cov_mx <- DOread::get_covar(covar, analyses_df)

Scan traits into SQLite

Genome scan1 (qtl2scan::scan1).

if(file.exists(tmp <- "scan_obj.rds")) {
  scan_obj <- readRDS(tmp)
} else {
  scan_obj <- qtl2scan::scan1(probs_obj, phe_df, K_chr, cov_mx)
  saveRDS(scan_obj, tmp)
}
object.size(scan_obj)
sapply(scan_obj, object.size)
sapply(scan_obj, class)

Save as long table

save_scan_qtl <- function(scan_obj, sql_file = "scan.sqlite",
                     create = !file.exists(sql_file)) {
  if(create & file.exists(sql_file))
    file.remove(sql_file)
  my_db <- dplyr::src_sqlite(sql_file, create = create)
  scans <- dplyr::bind_rows(
    lapply(scan_obj$map, function(x) {
      data.frame(mar = names(x),
                 pos = x)
    }),
    .id = "chr")
  scans <- tidyr::gather(
    cbind(scans, scan_obj$lod),
    pheno, lod, -chr, -mar, -pos)
  dplyr::copy_to(my_db,
                   scans,
                   name = "scans",
                   temporary = FALSE,
                   indexes = list(c("chr","pos"), "pheno"))
  dplyr::src_tbls(my_db)
}
read_scan_qtl <- function(chr_id=NULL, start_Mbp=0, end_Mbp=1000, pheno_names=NULL,
                          sql_file = "scan.sqlite") {
  my_db <- dplyr::src_sqlite(sql_file)
  scans <- dplyr::tbl(my_db, "scans")
  if(!is.null(chr_id)) {
    scans <- dplyr::filter(scans, chr == chr_id[1])
  }
  scans <- dplyr::filter(scans, 
    pos >= start_Mbp,
    pos <= end_Mbp)
  if(!is.null(pheno_names))
    scans <- dplyr::filter(pheno %in% pheno_names)
  dplyr::collect(scans, n = Inf)
}
save_scan_qtl(scan_obj, create=TRUE)
out <- read_scan_qtl("3", 50, 70)
out <- read_scan_qtl()

Save as is more or less

create_scan_qtl <- function(scan_obj, sql_file = "scan_is.sqlite",
                     create = !file.exists(sql_file)) {
  if(create & file.exists(sql_file))
    file.remove(sql_file)
  my_db <- dplyr::src_sqlite(sql_file, create = create)
  lod <- as.data.frame(scan_obj$lod)
  lod$mar <- rownames(lod)
  lod <- tidyr::gather(lod, pheno, lod, -mar)
  dplyr::copy_to(my_db, lod,
                 name = "lod",
                 temporary = FALSE)
  not_lod <- match("lod", names(scan_obj))
  # out points to SQL file and has other elements of scan_obj.
  out <- list(sql_file = sql_file)
  for(i in names(scan_obj)[-not_lod]) {
    out[[i]] <- scan_obj[[i]]
  }
  class(out) <- c("scan_qtl", class(out))
  out
}
subset.scan_qtl <- function(x, chr_id=NULL, start_Mbp=0, end_Mbp=1000, pheno_name=NULL,
                            ...) {
  my_db <- dplyr::src_sqlite(x$sql_file)
  # rearrange map into long table
  map <- dplyr::bind_rows(
    lapply(x$map, function(x) {
      data.frame(mar = names(x),
                 pos = x)
      }),
    .id = "chr")
  map <- dplyr::filter(map,
                       chr == chr_id,
                       pos >= start_Mbp,
                       pos <= end_Mbp)
  x$map <- x$map[chr_id]
  for(i in names(x$map)) {
    tmp <- x$map[[i]]
    tmp <- tmp[tmp >= start_Mbp & tmp <= end_Mbp]
    x$map[[i]] <- tmp
  }

  # extract lod from SQL for pheno_name or all traits.
  lod <- dplyr::tbl(my_db, "lod")
  if(!is.null(pheno_name))
    lod <- dplyr::filter(lod, 
                         pheno == pheno_name[1])
  # spread LODs back out over phenotypes
  lod <- tidyr::spread(
    dplyr::collect(lod,
                   n = Inf),
    pheno, lod)
  # reorder and subset by markers from map
  lod <- as.data.frame(lod[match(map$mar, lod$mar),, drop=FALSE])
  # put markers ans row names, drop from table, and make as matrix
  rownames(lod) <- lod$mar
  lod$mar <- NULL
  x$lod <- as.matrix(lod)

  # remake x as scan1 object
  x$sql_file <- NULL
  class(x)[[1]] <- "scan1"
  x
}
out_scan <- create_scan_qtl(scan_obj, create=TRUE)
out_is <- subset.scan_qtl(out_scan, "3", pheno_names)
out <- read_scan_qtl()


byandell/qtl2pattern documentation built on Nov. 9, 2023, 7:57 p.m.