inst/doc/DeepBlueR.R

## ---- eval = FALSE, echo=TRUE, warning=FALSE, message=FALSE-------------------
#  install.packages("BiocManager")
#  BiocManager::install("DeepBlueR")

## ---- echo = TRUE, warning=FALSE, message=FALSE, error=FALSE------------------
library(DeepBlueR)

## ---- echo=TRUE, eval = FALSE, warning=FALSE, message=FALSE-------------------
#  deepblue_info("me")

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
# We are selecting the experiments with terms 'H3k27AC', 'blood', and
# 'peak' in the metadata.
experiments_found = deepblue_search(
    keyword="'H3k27AC' 'blood' 'peak'", type="experiments")

custom_table = do.call("rbind", apply(experiments_found, 1, function(experiment){
  experiment_id = experiment[1]
  # Obtain the information about the experiment_id
  info = deepblue_info(experiment_id)

  # Print the experiment name, project, biosource, and epigenetic mark.
  with(info, { data.frame(name = name, project = project,
    biosource = sample_info$biosource_name, epigenetic_mark = epigenetic_mark)
      })
}))
  head(custom_table)

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
experiments = deepblue_list_experiments(type="peaks", epigenetic_mark="H3K4me3",
    biosource=c("inflammatory macrophage", "macrophage"),
    project="BLUEPRINT Epigenome")

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
info = deepblue_info("e30000")
print(info$extra_metadata$file_url)

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
query_id = deepblue_select_experiments(
    experiment_name=c("BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed",
        "S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed"))
# Count how many regions where selected
request_id = deepblue_count_regions(query_id=query_id)
# Download the request data as soon as processing is finished
requested_data = deepblue_download_request_data(request_id=request_id)
print(paste("The selected experiments have", requested_data, "regions."))

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
query_id = deepblue_select_experiments (
    experiment_name = c("BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed",
        "S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed"),
        chromosome="chr1", start=0, end=50000000)

# Retrieve the experiments data (The @NAME meta-column is used to include the
# experiment name and @BIOSOURCE for experiment's biosource
request_id = deepblue_get_regions(query_id=query_id,
    output_format="CHROMOSOME,START,END,SIGNAL_VALUE,PEAK,@NAME,@BIOSOURCE")
regions = deepblue_download_request_data(request_id=request_id)
regions

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
samples = deepblue_list_samples(
    biosource="myeloid cell",
    extra_metadata = list("source" = "BLUEPRINT Epigenome"))
samples_ids = deepblue_extract_ids(samples)
query_id = deepblue_select_regions(genome="GRCh38", sample=samples_ids,
    chromosome="chr1", start=0, end=50000)
request_id = deepblue_get_regions(query_id=query_id,
    output_format="CHROMOSOME,START,END,@NAME,@SAMPLE_ID,@BIOSOURCE")
regions = deepblue_download_request_data(request_id=request_id)
head(regions,1)

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
query_id = deepblue_select_experiments(
    experiment_name = c("BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed",
        "S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed"),
    chromosome="chr1", start=0, end=50000000)
query_id_filter_signal = deepblue_filter_regions(
    query_id=query_id, field="SIGNAL_VALUE", operation=">",
    value="10", type="number")
query_id_filters = deepblue_filter_regions(
    query_id=query_id_filter_signal, field="PEAK", operation=">",
    value="1000", type="number")
request_id = deepblue_get_regions(query_id=query_id_filters,
    output_format="CHROMOSOME,START,END,SIGNAL_VALUE,PEAK,@NAME,@BIOSOURCE")
regions = deepblue_download_request_data(request_id=request_id)
regions

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
query_id = deepblue_select_experiments(
    experiment_name = c("BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed",
        "S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed"),
    chromosome="chr1", start=0, end=50000000)
promoters_id = deepblue_select_annotations(annotation_name="promoters",
    genome="GRCh38", chromosome="chr1")
intersect_id = deepblue_intersection(
    query_data_id=query_id, query_filter_id=promoters_id)
request_id = deepblue_get_regions(
    query_id=intersect_id,
    output_format="CHROMOSOME,START,END,SIGNAL_VALUE,PEAK,@NAME,@BIOSOURCE")
regions = deepblue_download_request_data(request_id=request_id)
regions

## ---- message=FALSE, error=FALSE, warning=FALSE-------------------------------
library(Gviz)
atrack <- AnnotationTrack(regions, name = "Intersecting regions",
    group = regions$`@BIOSOURCE`, genome="hg38")
gtrack <- GenomeAxisTrack()
itrack <- IdeogramTrack(genome = "hg38", chromosome = "chr1")
plotTracks(list(itrack, atrack, gtrack), groupAnnotation="group", fontsize=18,
           background.panel = "#FFFEDB", background.title = "darkblue")

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
query_id = deepblue_select_experiments(
    experiment_name = c("BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed",
        "S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed"),
    chromosome="chr1", start=0, end=50000000)
query_id_filter_signal = deepblue_filter_regions(query_id=query_id,
    field="SIGNAL_VALUE", operation=">", value="10", type="number")
query_id_filters = deepblue_filter_regions(query_id=query_id_filter_signal,
    field="PEAK", operation=">", value="1000", type="number")
query_id_filter_length = deepblue_filter_regions (query_id=query_id_filters,
    field="@LENGTH", operation="<", value="2000", type="number")
request_id = deepblue_get_regions(query_id=query_id_filter_length,
    output_format="CHROMOSOME,START,END,@NAME,@BIOSOURCE,@LENGTH,@SEQUENCE")
regions = deepblue_download_request_data(request_id=request_id)
head(regions, 1)

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
tataa_regions = deepblue_find_motif(motif="TATAAA", genome="GRCh38", chromosomes="chr1")
query_id = deepblue_select_experiments(
    experiment_name= c("BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed",
        "S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed"),
    chromosome="chr1", start=0, end=50000000)
overlapped = deepblue_intersection(query_data_id=query_id,
                                   query_filter_id=tataa_regions)
request_id=deepblue_get_regions(overlapped,
    "CHROMOSOME,START,END,SIGNAL_VALUE,PEAK,@NAME,@BIOSOURCE,@LENGTH,@SEQUENCE,@PROJECT")
regions = deepblue_download_request_data(request_id=request_id)
head(regions, 3)

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
experiment_data = deepblue_select_experiments(
  "DG-75_c01.ERX297417.H3K27ac.bwa.GRCh38.20150527.bed")
fmt = "CHROMOSOME,START,END,@LENGTH,@COUNT.MOTIF(C),@COUNT.MOTIF(G),@COUNT.MOTIF(CG),@COUNT.MOTIF(GC),@SEQUENCE"
request_id=deepblue_get_regions(experiment_data, fmt)
regions = deepblue_download_request_data(request_id=request_id)
head(regions, 3)

## ---- echo=TRUE, eval=FALSE, warning=FALSE, message=FALSE---------------------
#  q_genes = deepblue_select_genes(genes="RP11-34P13", gene_model="gencode v23")
#  q_filter = deepblue_filter_regions(query_id=q_genes,
#      field="@GENE_ATTRIBUTE(gene_type)", operation="==",
#      value="lincRNA", type="string")
#  request_id=deepblue_get_regions(q_filter, "CHROMOSOME,START,END,GTF_ATTRIBUTES")
#  regions = deepblue_download_request_data(request_id=request_id)
#  regions

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
query_id = deepblue_select_experiments (
    experiment=c("GC_T14_10.CPG_methylation_calls.bs_call.GRCh38.20160531.wig"),
    chromosome="chr1", start=0, end=50000000)
cpg_islands = deepblue_select_annotations(annotation_name="CpG Islands",
    genome="GRCh38", chromosome="chr1", start=0, end=50000000)
# Aggregate
overlapped = deepblue_aggregate (data_id=query_id, ranges_id=cpg_islands,
    column="VALUE" )

# Retrieve the experiments data (The @NAME meta-column is used to include
# the experiment name and @BIOSOURCE for experiment's biosource
request_id = deepblue_get_regions(query_id=overlapped,
    output_format=
      "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEAN,@AGG.VAR")
regions = deepblue_download_request_data(request_id=request_id)

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
hsc_children = deepblue_get_biosource_children("hematopoietic stem cell")

hsc_children_name = deepblue_extract_names(hsc_children)

hsc_children_samples = deepblue_list_samples(
    biosource = hsc_children_name,
    extra_metadata = list(source="BLUEPRINT Epigenome"))

hsc_samples_ids = deepblue_extract_ids(hsc_children_samples)

# Note that BLUEPRINT uses Ensembl Gene IDs
gene_exprs_query = deepblue_select_expressions(
    expression_type = "gene",
    sample_ids = hsc_samples_ids,
    identifiers = c("ENSG00000074771.3", "ENSG00000188747.7", "ENSG00000086991.11"),
    gene_model = "gencode v22")

request_id = deepblue_get_regions(
    query_id = gene_exprs_query,
    output_format ="@GENE_NAME(gencode v22),CHROMOSOME,START,END,FPKM,@BIOSOURCE")

regions = deepblue_download_request_data(request_id = request_id)
regions

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
# Selecting the data from 2 experiments:
#    GC_T14_10.CPG_methylation_calls.bs_call.GRCh38.20160531.wig
# As we already know the experiments names, we keep all others fields empty.
# We are selecting all regions of chromosome 1
query_id = deepblue_select_experiments(
    experiment=c("GC_T14_10.CPG_methylation_calls.bs_call.GRCh38.20160531.wig"),
    chromosome="chr1")

#  Tiling regions of 100.000 base pairs
tiling_id = deepblue_tiling_regions(size=100000,
    genome="GRCh38", chromosome="chr1")

# Aggregate
overlapped = deepblue_aggregate (data_id=query_id,
    ranges_id=tiling_id, column="VALUE")

# Retrieve the experiments data (The @NAME meta-column is used to include the
# experiment name and @BIOSOURCE for experiment's biosource
request_id = deepblue_get_regions(query_id=overlapped,
    output_format="CHROMOSOME,START,END,@AGG.MEAN,@AGG.SD")

regions = deepblue_download_request_data(request_id=request_id)
regions

## -----------------------------------------------------------------------------
library(ggplot2)
plot_data <- as.data.frame(regions)
plot_data[,grepl("X.", colnames(plot_data))] <-
    apply(plot_data[,grepl("X.", colnames(plot_data))], 2, as.numeric)
AGG.plot <- ggplot(plot_data, aes(start)) +
    geom_ribbon(aes(ymin = X.AGG.MEAN - (X.AGG.SD / 2),
        ymax = X.AGG.MEAN + (X.AGG.SD / 2)), fill = "grey70") +
    geom_line(aes(y = X.AGG.MEAN))
print(AGG.plot)

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
# Select the RP11-34P13 gene locations from gencode v23
q_genes = deepblue_select_genes(
    genes=
        c("RNU6-1100P", "CICP7", "MRPL20", "ANKRD65",
            "HES2", "ACOT7", "HES3", "ICMT"), gene_model="gencode v19")

# Obtain the regions that starts 2500 bases pair before the regions start and
# have 2000 base pairs.
# The 4th argument inform that DeepBlue must consider the region strand
# (column STRAND) to calculate the new region
before_flank_id = deepblue_flank(query_id=q_genes,
     start=-2500, length=2000, use_strand=TRUE)

# Obtain the regions that starts 1500 bases pair after the
# regions end and have 500 base pairs.
# The 4th argument inform that DeepBlue must consider the
# region strand (column STRAND) to calculate the new region
after_flank_id = deepblue_flank(query_id=q_genes,
    start=1500, length=500, use_strand=TRUE)

# Merge both flanking regions set and genes set
flank_merge_id = deepblue_merge_queries(
    query_a_id =before_flank_id, query_b_id=after_flank_id)
all_merge_id = deepblue_merge_queries(
    query_a_id=q_genes, query_b_id=flank_merge_id)

# Request the regions
request_id = deepblue_get_regions(query_id=all_merge_id,
    output_format="CHROMOSOME,START,END,STRAND,@LENGTH")

regions = deepblue_download_request_data(request_id=request_id)
regions

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
# Select the RP11-34P13 gene locations from gencode v23

# Selecting the data from 2 experiments:
#    GC_T14_10.CPG_methylation_calls.bs_call.GRCh38.20160531.wig
# As we already know the experiments names, we keep all others fields empty.
# We are selecting all regions of chromosome 1
query_id = deepblue_select_experiments(
    experiment="GC_T14_10.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
    chromosome="chr1")

# Select the CpG Islands annotation from GRCh38
cpg_islands = deepblue_select_annotations(
    annotation="CpG Islands", genome="GRCh38", chromosome="chr1")

# Aggregate
overlapped = deepblue_aggregate(
    data_id=query_id, ranges_id=cpg_islands, column="VALUE")

# Select the aggregated regions that aggregated at least one region from the
# selected experiments (@AGG.COUNT > 0)
filtered = deepblue_filter_regions(query_id=overlapped,
    field="@AGG.COUNT", operation=">", value="0", type="number")

# We remove all regions where the aggregation mean is zero.
filtered_zeros = deepblue_filter_regions(query_id=filtered,
    field="@AGG.MEAN", operation="!=", value="0.0", type="number")

# Retrieve the experiments data (The @NAME meta-column is used to include the
# experiment name and @BIOSOURCE for experiment's biosource
request_id = deepblue_get_regions(query_id=filtered_zeros,
    output_format=
    "CHROMOSOME,START,END,@CALCULATED(return math.log(value_of('@AGG.MEAN'))),@AGG.MEAN,@AGG.COUNT")

regions = deepblue_download_request_data(request_id=request_id)

# We have to perform a manual conversion because the
# package can't know the type for calculated columns
regions$`@CALCULATED(return math.log(value_of('@AGG.MEAN')))` =
    as.numeric(regions$`@CALCULATED(return math.log(value_of('@AGG.MEAN')))`)

head(regions, 5)

## ---- warning=FALSE, error=FALSE----------------------------------------------
library(Gviz)
atrack <- AnnotationTrack(regions,
    name = "CpGs", group = regions$`@BIOSOURCE`, genome="hg38")
gtrack <- GenomeAxisTrack()
itrack <- IdeogramTrack(genome = "hg38", chromosome = "chr1")
dtrack <- DataTrack(regions,
    data="@AGG.MEAN", name = "Log of average methylation value")
plotTracks(list(itrack, atrack, dtrack, gtrack), type="histogram", fontsize=18,
           background.panel = "#FFFEDB", background.title = "darkblue")

## ---- echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE----------------------
experiments =
    c("GC_T14_10.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "C003N351.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "C005VG51.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "S002R551.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "NBC_NC11_41.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "bmPCs-V156.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "S00BS451.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "S00D1DA1.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
        "S00D39A1.CPG_methylation_calls.bs_call.GRCh38.20160531.wig")

experiments_columns = list()
for (experiment_name in experiments) {
    experiments_columns[[experiment_name]] = "VALUE"
}

cpgs = deepblue_select_annotations(
    annotation_name="Cpg Islands",
    chromosome="chr22", start=0, end=18000000, genome="GRCh38")

request_id = deepblue_score_matrix(
    experiments_columns=experiments_columns,
    aggregation_function="mean", aggregation_regions_id=cpgs)

score_matrix = deepblue_download_request_data(request_id=request_id)
head(score_matrix, 5)

## ---- fig.width = 11, warning=FALSE, echo=TRUE, error=FALSE, eval=TRUE--------
library(ggplot2)
score_matrix_plot = tidyr::gather(score_matrix,
    "experiment", "methylation", -CHROMOSOME, -START, -END)
score_matrix_plot$START <- as.factor(score_matrix_plot$START)
ggplot(score_matrix_plot, aes(x=START, y=experiment, fill=methylation)) +
    geom_tile() +
    theme(axis.text.x=element_text(angle=-90))

## ---- eval = FALSE------------------------------------------------------------
#  deepblue_export_tab(score_matrix, file.name = "my_score_matrix")

## ---- eval=FALSE--------------------------------------------------------------
#  request_id = deepblue_get_regions(query_id=overlapped,
#      output_format="CHROMOSOME,START,END,@AGG.MEAN,@AGG.SD")
#  
#  regions = deepblue_download_request_data(request_id=request_id)
#  deepblue_export_bed(regions,
#                      file.name = "my_tiling_regions",
#                      score.field = "@AGG.MEAN")

## ---- eval=FALSE--------------------------------------------------------------
#  exp_id <- deepblue_name_to_id(
#      "GC_T14_10.CPG_methylation_calls.bs_call.GRCh38.20160531.wig",
#      collection = "experiments")$id
#  
#  deepblue_export_meta_data(exp_id, file.name = "GC_T14")

## ---- eval=FALSE--------------------------------------------------------------
#  deepblue_export_meta_data(list("e30035", "e30036"),
#  file.name = "test_export")

## ---- eval=FALSE--------------------------------------------------------------
#  experiments = deepblue_list_experiments(type="peaks", epigenetic_mark="H3K4me3",
#      biosource=c("inflammatory macrophage", "macrophage"),
#      project="BLUEPRINT Epigenome")
#  experiment_names = deepblue_extract_names(experiments)
#  
#  request_ids = foreach(experiment = experiment_names) %do%{
#    query_id = deepblue_select_experiments(experiment_name = experiment,
#                                          chromosome = "chr21")
#  
#    request_id = deepblue_get_regions(query_id =query_id,
#      output_format = "CHROMOSOME,START,END")
#  }
#  request_data = deepblue_batch_export_results(request_ids,
#                               target.directory = "BLUEPRINT macrophages chr21")

## ---- echo=TRUE, eval = TRUE, warning=FALSE, message=FALSE--------------------
deepblue_options()

## ---- echo=TRUE, eval = TRUE, warning=FALSE, message=FALSE--------------------
deepblue_options(do_not_cache = TRUE)

## ---- echo=TRUE, eval = FALSE, warning=FALSE, message=FALSE-------------------
#  deepblue_options(user_key = "my_user_key")

## ---- echo=TRUE, eval = FALSE, warning=FALSE, message=FALSE-------------------
#  deepblue_reset_options()

## ---- echo=TRUE, eval = FALSE, warning=FALSE, message=FALSE-------------------
#  deepblue_cache_status()

## ---- echo=TRUE, eval = FALSE, warning=FALSE, message=FALSE-------------------
#  deepblue_list_cached_requests()

## ---- echo=TRUE, eval = FALSE, warning=FALSE, message=FALSE-------------------
#  deepblue_delete_request_from_cache("r123")

## ---- echo=TRUE, eval = FALSE, warning=FALSE, message=FALSE-------------------
#  deepblue_clear_cache()

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
library(foreach)

chromosomes_mm10 <- deepblue_extract_ids(deepblue_chromosomes(genome = "mm10"))

request_ids <- foreach(chromosome = chromosomes_mm10, .combine = c) %do% {
    
    tiling_regions = deepblue_tiling_regions(
        size=100000, genome="mm10", chromosome=chromosome)
    
    deepblue_score_matrix(
        experiments_columns = list(ENCFF721EKA="VALUE", ENCFF781VVH="VALUE"),
        aggregation_function = "mean",
        aggregation_regions_id = tiling_regions
    )
}


## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
list_of_score_matrices <- deepblue_batch_export_results(request_ids)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
library(data.table)
genome_wide_score_matrix <- data.table::rbindlist(list_of_score_matrices,
                                                  use.names = TRUE)
genome_wide_score_matrix

## ----dependencies, message=FALSE, warning=FALSE-------------------------------
library(DeepBlueR)
library(gplots)
library(RColorBrewer)
library(matrixStats)
library(stringr)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
blueprint_DNA_meth <- deepblue_list_experiments(genome = "GRCh38",
                          epigenetic_mark = "DNA Methylation",
                          technique = "Bisulfite-Seq",
                          project = "BLUEPRINT EPIGENOME")

blueprint_DNA_meth

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
blueprint_DNA_meth <- blueprint_DNA_meth[grep("CPG_methylation_calls.bs_call",
    deepblue_extract_names(blueprint_DNA_meth)),]

blueprint_DNA_meth

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
exp_columns <- list(nrow(blueprint_DNA_meth))

for(i in 1:nrow(blueprint_DNA_meth)){
    exp_columns[[i]] <- "VALUE"
}
names(exp_columns) <- deepblue_extract_names(blueprint_DNA_meth)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
exp_columns <- deepblue_select_column(blueprint_DNA_meth, "VALUE")

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
#list all available chromosomes in GRCh38
chromosomes_GRCh38 <- deepblue_extract_ids(
    deepblue_chromosomes(genome = "GRCh38")
)

#keep only the essential ones
chromosomes_GRCh38 <- 
    grep(pattern = "chr([0-9]{1,2}|X)$", chromosomes_GRCh38, value = TRUE)

#we split the request by chromosome to avoid hitting the memory limit of 
#DeepBlue
blueprint_regulatory_regions <- 
    foreach(chr = chromosomes_GRCh38, .combine = c) %do%
        deepblue_select_annotations(
            annotation_name = "Blueprint Ensembl Regulatory Build",
            chromosome = chr,
            genome = "GRCh38"
        )

blueprint_regulatory_regions

## ---- eval=FALSE--------------------------------------------------------------
#  deepblue_select_annotations(annotation_name = "Cpg Islands",
#                              genome = "GRCh38")

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
deepblue_list_annotations(genome = "GRCh38")

## ---- eval=FALSE--------------------------------------------------------------
#  tiling_regions <- deepblue_tiling_regions(size=5000,
#                                            genome="GRCh38")

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
request_ids <- foreach(query_id = blueprint_regulatory_regions,
                       .combine = c) %do% 
    deepblue_score_matrix(
        experiments_columns = exp_columns,
        aggregation_function = "mean",
        aggregation_regions_id = query_id)

request_ids

## ----eval=TRUE, echo=TRUE, message=FALSE, warning=FALSE-----------------------
foreach(request = request_ids, .combine = c) %do% {
  deepblue_info(request)$state
}

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
score_matrix <- data.table::rbindlist(
    deepblue_batch_export_results(request_ids),
    use.names = TRUE)
score_matrix[,1:5, with=FALSE]

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
getPalette <- colorRampPalette(brewer.pal(9, "Set1"))

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
experiments_info <- deepblue_info(deepblue_extract_ids(blueprint_DNA_meth))

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
head(experiments_info[[1]], 10)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
biosource <- unlist(lapply(experiments_info, function(x){ x$sample_info$biosource_name}))
head(biosource)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
biosource <- str_replace_all(biosource, "-positive", "+")
biosource <- str_replace_all(biosource, "-negative", "-")

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
biosource <- str_replace(biosource, ", terminally differentiated", "")

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
color_map <- data.frame(biosource = unique(biosource),
                        color = getPalette(length(unique(biosource))))

head(color_map)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
exp_names <- unlist(lapply(experiments_info, function(x){ x$name}))

biosource_colors <- data.frame(name = exp_names, biosource = biosource)
biosource_colors <- dplyr::left_join(biosource_colors, color_map, by = "biosource")
head(biosource_colors)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
color_vector <- as.character(biosource_colors$color)
names(color_vector) <-  biosource_colors$biosource
head(color_vector)

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
filtered_score_matrix <- as.matrix(score_matrix[,-c(1:3), with=FALSE])
head(filtered_score_matrix[,1:3])

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
message("regions before: ", nrow(filtered_score_matrix))
filtered_score_matrix_rowVars <- rowVars(filtered_score_matrix, na.rm = TRUE)
filtered_score_matrix <- filtered_score_matrix[which(filtered_score_matrix_rowVars > 0.05),]
message("regions after: ", nrow(filtered_score_matrix))

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
message("regions before: ", nrow(filtered_score_matrix))
filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),]
message("regions after: ", nrow(filtered_score_matrix))

## ----echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE-----------------------
filtered_score_matrix <- filtered_score_matrix[,exp_names]

## ---- fig.width=11, echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE--------
 heatmap.2(filtered_score_matrix,labRow = NA, labCol = NA,
          trace = "none", ColSideColors = color_vector,
          hclust=function(x) hclust(x,method="complete"),
          distfun=function(x) as.dist(1-cor(t(x), method = "pearson")),
          Rowv = TRUE, dendrogram = "column",
          key.xlab = "beta value", denscol = "black", keysize = 1.5,
          key.title = NA)


## ---- fig.height=6, echo=TRUE, eval=TRUE, warning=FALSE, message=FALSE--------
  plot.new()

  legend(x = 0, y = 1,
       legend = color_map$biosource,
       col = as.character(color_map$color),
       text.width = 0.6,
       lty= 1,
       lwd = 6,
       cex = 0.7,
       y.intersp = 0.7,
       x.intersp = 0.7,
       inset=c(-0.21,-0.11))


## ---- eval=FALSE--------------------------------------------------------------
#  demo(package = "DeepBlueR")

## ---- eval=FALSE--------------------------------------------------------------
#  demo("use_case1", package = "DeepBlueR")

Try the DeepBlueR package in your browser

Any scripts or data that you put into this service are public.

DeepBlueR documentation built on Nov. 8, 2020, 6:25 p.m.