inst/doc/V5_slam_seq.R

## ----echo=F-------------------------------------------------------------------
# To examine objects:
# devtools::load_all(export_all=F) ; qwraps2::lazyload_cache_dir("vignettes/V5_slam_seq_cache/html")

knitr::opts_chunk$set(cache=TRUE, autodep=TRUE)

## ----setup, cache=FALSE, warning=FALSE, message=FALSE-------------------------
library(tidyverse)
library(ComplexHeatmap)
library(weitrix)

# BiocParallel supports multiple backends. 
# If the default hangs or errors, try others.
# The most reliable backed is to use serial processing
BiocParallel::register( BiocParallel::SerialParam() )

## ----load, message=FALSE------------------------------------------------------
coverage <- system.file("GSE99970", "GSE99970_T_coverage.csv.gz", package="weitrix") %>%
    read_csv() %>%
    column_to_rownames("gene") %>%
    as.matrix()

conversions <- system.file("GSE99970", "GSE99970_T_C_conversions.csv.gz", package="weitrix") %>%
    read_csv() %>%
    column_to_rownames("gene") %>%
    as.matrix()

# Calculate proportions, create weitrix
wei <- as_weitrix( conversions/coverage, coverage )
dim(wei)

# We will only use genes where at least 30 conversions were observed
good <- rowSums(conversions) >= 30
wei <- wei[good,]

# Add some column data from the names
parts <- str_match(colnames(wei), "(.*)_(Rep_.*)")
colData(wei)$group <- fct_inorder(parts[,2])
colData(wei)$rep <- fct_inorder(paste0("Rep_",parts[,3]))
rowData(wei)$mean_coverage <- rowMeans(weitrix_weights(wei))

wei

colMeans(weitrix_x(wei), na.rm=TRUE)

## ----calibrate----------------------------------------------------------------
# Compute an initial fit to provide residuals
fit <- weitrix_components(wei, design=~group)

cal <- weitrix_calibrate_all(wei, 
    design = fit,
    trend_formula = 
        ~ log(weight) + offset(log(mu*(1-mu))), 
    mu_min=0.001, mu_max=0.999)

metadata(cal)$weitrix$all_coef

## ----calplot_mu, fig.height=8-------------------------------------------------
weitrix_calplot(wei, fit, cat=group, covar=mu, guides=FALSE) + 
    coord_cartesian(xlim=c(0,0.1)) + labs(title="Before calibration")
weitrix_calplot(cal, fit, cat=group, covar=mu) + 
    coord_cartesian(xlim=c(0,0.1)) + labs(title="After calibration")

## ----calplot_weight, fig.height=8---------------------------------------------
weitrix_calplot(wei, fit, cat=group, covar=log(weitrix_weights(wei)), guides=FALSE) + 
    labs(title="Before calibration")
weitrix_calplot(cal, fit, cat=group, covar=log(weitrix_weights(wei))) + 
    labs(title="After calibration")

## ----comp, message=FALSE------------------------------------------------------
comp <- weitrix_components(cal, 2, n_restarts=1)

## ----showcomp-----------------------------------------------------------------
matrix_long(comp$col[,-1], row_info=colData(cal)) %>% 
    ggplot(aes(x=group,y=value)) + 
    geom_jitter(width=0.2,height=0) + 
    facet_grid(col~.)

## ----comp1--------------------------------------------------------------------
fast <- weitrix_confects(cal, comp$col, "C1")
fast

Heatmap(
    weitrix_x(cal)[ head(fast$table$name, 10) ,], 
    name="Proportion converted", 
    cluster_columns=FALSE, cluster_rows=FALSE)

## ----comp2--------------------------------------------------------------------
slow <- weitrix_confects(cal, comp$col, "C2")
slow

Heatmap(
    weitrix_x(cal)[ head(slow$table$name, 10) ,], 
    name="Proportion converted", 
    cluster_columns=FALSE, cluster_rows=FALSE)

## ----eval=FALSE---------------------------------------------------------------
#  library(tidyverse)
#  
#  download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE99970&format=file", "GSE99970_RAW.tar")
#  untar("GSE99970_RAW.tar", exdir="GSE99970_RAW")
#  
#  filenames <- list.files("GSE99970_RAW", full.names=TRUE)
#  samples <- str_match(filenames,"mESC_(.*)\\.tsv\\.gz")[,2]
#  dfs <- map(filenames, read_tsv, comment="#")
#  coverage <- do.call(cbind, map(dfs, "CoverageOnTs")) %>%
#      rowsum(dfs[[1]]$Name)
#  conversions <- do.call(cbind, map(dfs, "ConversionsOnTs")) %>%
#      rowsum(dfs[[1]]$Name)
#  colnames(conversions) <- colnames(coverage) <- samples
#  
#  reorder <- c(1:3, 25:27, 4:24)
#  
#  coverage[,reorder] %>%
#      as.data.frame() %>%
#      rownames_to_column("gene") %>%
#      write_csv("GSE99970_T_coverage.csv.gz")
#  
#  conversions[,reorder] %>%
#      as.data.frame() %>%
#      rownames_to_column("gene") %>%
#      write_csv("GSE99970_T_C_conversions.csv.gz")

Try the weitrix package in your browser

Any scripts or data that you put into this service are public.

weitrix documentation built on Nov. 8, 2020, 8:10 p.m.