knitr::opts_chunk$set(echo = TRUE)

Set up files for qtl2shiny package. Do this once, or whenever data changes. This assumes that DerivedData have been placed in an accessible folder. These data, in turn, come from various sources.

dirpath <- "~/Documents/Research/attie_alan/DO/data"
datapath <- file.path(dirpath, "DerivedData")
covar        <- DOread::setup_covar(datapath)
peaks        <- DOread::setup_peaks(datapath) 
analyses_tbl <- DOread::setup_analyses(peaks, datapath)
pheno_data   <- DOread::setup_data(analyses_tbl, peaks, datapath)
pheno_type   <- DOread::setup_type(analyses_tbl)

Rename groups

group_rename <- function(x) {
  x[x == "BileAcid"] <- "Molecule"
  x[x %in% c("otu","otufam")] <- "OldOTU"
  x[x %in% c("gutMB","OTU_Module")] <- "OTU_Closed_Ref"
  x[x == "clin"] <- "Clinical"
  x
}
peaks <- 
  dplyr::mutate(peaks, 
                pheno_group = group_rename(pheno_group))
analyses_tbl <- 
  dplyr::mutate(analyses_tbl, 
                pheno_group = group_rename(pheno_group))

Reduce to peaks and analyses that match phenotypes

Remove unknowns that have no pheno data.

nopeaks <- dplyr::filter(peaks,
                         !(pheno %in% names(pheno_data)))
nopheno <- dplyr::distinct(nopeaks, pheno)$pheno
table(stringr::str_sub(nopheno,1,2))
table(stringr::str_sub(nopheno[grep("Liver|Plasma", nopheno)],1,10))

Note that following step is problematic if any peaks have no pheno data. See examples above.

Covariates used

dplyr::select(
  dplyr::filter(
    tidyr::spread(
      dplyr::mutate(
        dplyr::bind_rows(
          purrr::map(
            as.list(
              dplyr::select(analyses_tbl, -(pheno:winsorize))),
            function(x, gp) {
                out <- dplyr::mutate(
                  data.frame(table(gp, x)),
                  gp = as.character(gp),
                  x = as.character(x))
            },
            analyses_tbl$pheno_group),
          .id = "covar"),
        covar = factor(covar, unique(covar))),
      gp, Freq),
    x == "TRUE"),
  -x)

Phenotype groups

dplyr::count(analyses_tbl, pheno_group)

Filter peaks and analyses

peaks <- dplyr::filter(peaks,
                       pheno %in% names(pheno_data))
analyses_tbl <- dplyr::filter(analyses_tbl,
                       pheno %in% names(pheno_data))

Hotspots

pmap <- readRDS(file.path(datapath, "pmap.rds"))
hots <- qtl2pattern::hotspot(pmap, peaks)

Write out results

if(!dir.exists(filtered <- file.path("~/Documents/Research/qtl2shiny", "qtl2shinyData", "CCmouse", "AttieDO")))
  dir.create(filtered)
saveRDS(covar, file.path(filtered, "covar.rds"))
saveRDS(peaks, file.path(filtered, "peaks.rds"))
saveRDS(analyses_tbl, file.path(filtered, "analyses.rds"))
saveRDS(pheno_data, file.path(filtered, "pheno_data.rds"))
saveRDS(pheno_type, file.path(filtered, "pheno_type.rds"))
saveRDS(hots, file.path(filtered, "hotspot.rds"))


byandell/DOread documentation built on May 13, 2019, 9:26 a.m.