R/SEbuilder.R

Defines functions bindRowranges450k replaceRownames buildPancanSE featValMap featIDMapper

Documented in buildPancanSE featIDMapper replaceRownames

#' @import GenomicFeatures
#' @import magrittr 
#' @export
magrittr::`%>%`
#' @importFrom dplyr tbl tbl_df select filter summarise n group_by src_tbls tbl_vars select_ group_by_
#' @export
dplyr::`filter`
#' @export
dplyr::`tbl`
#' @export
dplyr::`select`
#' define assay-specific feature names in a character vector
#' @import bigrquery
#' @export dbListTables
#' @note We may want to use Symbol instead of Entrez when retrieving
#' expression data.  The value of this function is supplied as a
#' default for \code{\link{buildPancanSE}}'s featIDMap parameter, 
#' and alternatives can
#' be selected by passing similarly named vectors in featIDMap.
#' @examples
#' featIDMapper()
#' @export
featIDMapper = function() {
 c(RNASeqv2="Entrez",
   RPPA_clean="Protein",
   meth27k="ID",
   meth450k="ID",
   methMerged="ID",
   miRNA="ID")
}
featValMap=function() {
 c(RNASeqv2="normalized_count",
   RPPA_clean="Value",
   meth27k="Beta",
   meth450k="Beta",
   methMerged="Beta",
   miRNA="miRNAexpr")
}
#' helper for SummarizedExperiment construction from pancan
#' @param bq instance of BigQueryConnection for pancancer-atlas.Annotated Dataset
#' @param acronym character(1) 'cohort' label, e.g., 'BLCA'
#' @param assay character(1) element from names(BiocOncoTK::annotTabs), e.g., 'meth450k'.
#' If `assay == "mc3_MAF"` an error is thrown as the mutation data are
#' inconsistently annotated; the message produced directs the user to
#' `mc3toGR`.
#' @param sampType character(1) element from 
#' BiocOncoTK::pancan_sampTypeMap$"SampleTypeLetterCode", 
#' e.g., 'TP' for Primary solid Tumor samples,
#' or 'TB' for peripheral blood sample from primary blood derived cancer
#' @param subjectIDName character(1) field name for subject identifier
#' @param seTransform a function that accepts a SummarizedExperiment and returns a SummarizedExperiment; useful for feature name remapping, defaults to force (does nothing)
#' @param bindMethRowranges logical(1) if true and assay is meth27k
#' @param featIDMap a named character() vector defining, for each
#' assay type, what field should be used to label features in rownames.
#' or meth450k, annotation from FDb.InfiniumMethylation.hg19
#' and EnsDb.Hsapiens.v75 is obtained for available features
#' and bound into the rowRanges component of returned object
#' @note Note that pancancer-atlas is distinguished from TCGA by the presence of more
#' sample types.  The default type is 'TP' for primary solid tumor.
#' Codes and their interpretations are available in 
#' BiocOncoTK::pancan_sampTypeMap.
#' @return SummarizedExperiment, with metadata on acronym, assay,
#' and sampleType propagated; if the assay is a methylation
#' assay and bindMethRowranges is TRUE, a RangedSummarizedExperiment
#' is returned.
#' @examples
#' if (interactive() && Biobase::testBioCConnection()) {
#'    billco = Sys.getenv("CGC_BILLING")
#'    if (nchar(billco)>0) {
#'      bq = pancan_BQ()
#'      methSE_BLCA = try(buildPancanSE(bq))
#'      methSE_BLCA
#'    }
#' }
#' @export
buildPancanSE = function(bq, acronym = 'BLCA',
  assay = 'meth450k', sampType = 'TP', 
  subjectIDName = "ParticipantBarcode", seTransform=force,
  bindMethRowranges = TRUE, featIDMap=featIDMapper()) {
 if (!requireNamespace("restfulSE")) stop("install restfulSE to use this function")
 if (assay == "mc3_MAF") stop("please use mc3toGR for mutation data")
 stopifnot (assay %in% names(BiocOncoTK::annotTabs) )
 stopifnot (is(bq, "BigQueryConnection"))
 stopifnot (assay %in% names(featIDMap))
 ans = restfulSE::pancan_SE( bq, colDFilterValue = acronym,
     assayDataTableName = BiocOncoTK::annotTabs[ assay ],
     assayFeatureName = featIDMap[ assay ], assaySampleTypeCode = sampType,
     subjectIDName = subjectIDName, 
     tumorFieldName = "Study", tumorFieldValue = acronym,
      assayValueFieldName = featValMap()[ assay ] )
 if (is.list(metadata(ans))) metadata(ans) = 
      c(metadata(ans), acronym=acronym,
         assay = assay, sampType=sampType)
 if ((assay %in% c("meth27k", "meth450k")) & bindMethRowranges)
      ans = bindRowranges450k(ans)
 ans
}

#' map rownames of an SE to another vocabulary
#' @param se SummarizedExperiment instance
#' @param sourceVocab character(1) must be a keytype of org.Hs.eg.db, defaults to 'ENTREZID'
#' @param targetVocab character(1) must be a column of org.Hs.eg.db
#' @export
replaceRownames = function(se, sourceVocab="ENTREZID", targetVocab="SYMBOL") {
 if (!requireNamespace("org.Hs.eg.db")) stop("install org.Hs.eg.db to use replaceEntrez")
 if (!requireNamespace("AnnotationDbi")) stop("install AnnotationDbi to use replaceEntrez")
 rn = rownames(se)
 ks = AnnotationDbi::keys(org.Hs.eg.db::org.Hs.eg.db, keytype=sourceVocab)
 todrop = setdiff(rn, ks)
 if (length(todrop>0)) {
  message(paste(length(todrop), "rows unmapped to ENTREZ, dropped"))
  se = se[-match(todrop, rownames(se)),]
  }
 rownames(se) = AnnotationDbi::mapIds(org.Hs.eg.db::org.Hs.eg.db, 
   keys=rownames(se), keytype=sourceVocab, "ENTREZID",column=targetVocab)
 se
}


bindRowranges450k = function(se,
   platformObject = FDb.InfiniumMethylation.hg19::FDb.InfiniumMethylation.hg19,
   geneObject = EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75) {
  mdass = S4Vectors::metadata(se)$assay
  if (!requireNamespace("FDb.InfiniumMethylation.hg19"))
    stop("please install FDb.InfiniumMethylation.hg19 to use bindRowranges450k")
  if (!requireNamespace("EnsDb.Hsapiens.v75"))
    stop("please install EnsDb.Hsapiens.v75 to use bindRowranges450k")
  feat = GenomicFeatures::features(platformObject)
  mcols(feat) = NULL
  f2use = feat[rownames(se)]
  gmap = GenomicFeatures::genes(geneObject)
  GenomeInfoDb::genome(gmap) = "hg19"
  GenomeInfoDb::seqlevelsStyle(gmap) = "UCSC"
  gmap = resize(gmap,1)
  gmap = gmap[IRanges::nearest(f2use, gmap)]
  names(gmap) = names(f2use)
  SummarizedExperiment::rowRanges(se) = gmap[,c("gene_id", "gene_name", 
    "gene_biotype")]
  se
}

Try the BiocOncoTK package in your browser

Any scripts or data that you put into this service are public.

BiocOncoTK documentation built on Nov. 8, 2020, 6:03 p.m.