library(metageneInputs)
library(ENCODExplorer)

Introduction

The goals of this vignette are:

All the files produced will be save inside a temporary directory and will need to be moved manually:

outdir <- paste0("/tmp/", basename(tempfile("")))
dir.create(outdir)
outdir

Designs

ENCODE

The main part of the analysis consists in producing the metagene objects for the ENCODE dataset for the GM12878 cell line. In order to do so, we must first fetch the names of the targets which have bam files available in ENCODE:

cell_line <- "GM12878"
targets <- get_encode_targets(cell_line)
targets

We need to remove the histones:

i <- !grepl("H[0-9]K", targets)
targets <- targets[i]
targets

We need to remove the control:

i <- targets != "Control" & targets != "mouse" & targets != "rabbit"
targets <- targets[i]
targets

We produce a design for each target:

# We use the log hack to avoid all the outpus from the queryEncode function
log <- capture.output({
    designs_encode <- lapply(targets, get_encode_design, cell_line)
})
names(designs_encode) <- targets
head(designs_encode)

Validation

The validation of the results will be performed on the "Variation and genetic control of chromatin architecture in humans" dataset.

Experiment descriptions

Load and clean the descriptions:

library(tidyr)
library(dplyr)
file_descriptions <- system.file("extdata/E-MTAB-3657.sdrf.txt",
                                package = "metageneInputs")
descriptions <- read.table(file_descriptions, header = TRUE, sep = "\t",
                           stringsAsFactors = FALSE)
to_keep <- c("Source.Name", "Comment..Derived.ArrayExpress.FTP.file.")
descriptions <- descriptions[,colnames(descriptions) %in% to_keep]
colnames(descriptions) <- c("individual_target", "url")
descriptions <- tbl_df(descriptions) %>%
                    separate(individual_target,
                             into = c("individual", "target"), sep = "_") %>%
                    filter(target %in% c("PU.1", "RPB2"))

Create the design:

design_validation <- data.frame(bam_files = basename(descriptions$url))
i <- descriptions[["target"]] == "PU.1"
j <- descriptions[["target"]] == "RPB2"
design_validation[["PU.1"]] <- 0
design_validation[["RPB2"]] <- 0
design_validation[["PU.1"]][i] <- 1
design_validation[["RPB2"]][j] <- 1
design_validation

Save the results:

outfile <- paste0(outdir, "/designs.RData")
save(designs_encode, design_validation, file = outfile)

Makefile

We will now produce the Makefile to download the bam files.

outfile <- paste0(outdir, "/Makefile")
fileConn<-file(outfile, open = "w")
writeLines("# Targets", fileConn)
close(fileConn)
fileConn<-file(outfile, open = "a")
writeLines("## ENCODE's files", fileConn)
bam_files <- unique(as.character(unlist(lapply(designs_encode,
                                               `[[`, "bam_files"))))
writeLines(paste0("TARGETS_ENCODE+=", bam_files), fileConn)
writeLines("", fileConn)
writeLines("## Validation's files", fileConn)
bam_files <- as.character(design_validation[["bam_files"]])
writeLines(paste0("TARGETS_VALIDATION+=", bam_files), fileConn)
writeLines("", fileConn)
writeLines("# URLs", fileConn)
writeLines("URL_ENCODE=https://www.encodeproject.org/files", fileConn)
writeLines("URL_VALIDATION=ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3657",
           fileConn)
writeLines("", fileConn)
writeLines("# Phony targets", fileConn)
writeLines(".PHONY: all", fileConn)
writeLines("all: $(TARGETS_ENCODE) $(TARGETS_VALIDATION)", fileConn)
writeLines("", fileConn)
writeLines("# Recipes", fileConn)
writeLines("ENCFF%.bam:", fileConn)
writeLines("\twget $(URL_ENCODE)/ENCFF$*/@@download/$@", fileConn)
writeLines("", fileConn)
writeLines("E-MTAB%.bam:", fileConn)
writeLines("\twget $(URL_VALIDATION)/$@", fileConn)
close(fileConn)


CharlesJB/metageneInputs documentation built on May 6, 2019, 9:58 a.m.