inst/doc/c_HPAanalyze_case_query.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse=TRUE,
  comment="#>",
  warning=FALSE,
  error=FALSE,
  eval=FALSE,
  crop = NULL
)

## ----library, message=FALSE, warning=FALSE, error=FALSE-----------------------
#  library(BiocStyle)
#  library(HPAanalyze)
#  library(dplyr)
#  library(tibble)
#  library(readr)
#  library(tidyr)

## ----echo=FALSE, eval=TRUE, fig.cap="The 'Fields >>' button.", out.width = '100%'----
knitr::include_graphics("figures/query_fields.png")

## ----echo=FALSE, eval=TRUE, fig.cap="Build your query with the drop-down menus.", out.width = '100%'----
knitr::include_graphics("figures/query_dropdown.png")

## ----echo=FALSE, eval=TRUE, fig.cap="Click the 'Search' button.", out.width = '100%'----
knitr::include_graphics("figures/query_search.png")

## ----echo=FALSE, eval=TRUE, fig.cap="Copy the link to the tsv file.", out.width = '100%'----
knitr::include_graphics("figures/query_tsvlink.png")

## -----------------------------------------------------------------------------
#  ## The link to your query tsv
#  my_hpa_query <- "https://www.proteinatlas.org/search/protein_class%3ACD+markers+AND+normal_expression%3ACerebral+cortex%3BAny%3BNot+detected%2CLow+AND+prognostic%3AGlioma%3BUnfavourable?format=tsv"
#  
#  ## Create a temporary file as destination for the download
#  temp <- tempfile("query", fileext=c(".tsv.gz"))
#  
#  ## Download to the temporary file
#  download.file(url=my_hpa_query, destfile = temp, method = "curl", mode = "wb")
#  
#  ## read the file into a data frame
#  query_df <- readr::read_tsv(temp)
#  
#  ## Unlink the temp file
#  unlink(temp)

## -----------------------------------------------------------------------------
#  tibble::glimpse(query_df)
#  
#  #> Observations: 6
#  #> Variables: 22
#  #> $ Gene                        <chr> "CD81", "NRP1", "PRNP", "SDC1", "THY...
#  #> $ `Gene synonym`              <chr> "TAPA-1, TAPA1, TSPAN28", "CD304, NR...
#  #> $ Ensembl                     <chr> "ENSG00000110651", "ENSG00000099250"...
#  #> $ `Gene description`          <chr> "CD81 molecule", "Neuropilin 1", "Pr...
#  #> $ Chromosome                  <dbl> 11, 10, 20, 2, 11, 17
#  #> $ Position                    <chr> "2376177-2397419", "33177492-3333626...
#  #> $ `Protein class`             <chr> "CD markers, Disease related genes, ...
#  #> $ Evidence                    <chr> "Evidence at protein level", "Eviden...
#  #> $ Antibody                    <chr> "CAB002507, HPA007234", "CAB004511, ...
#  #> $ `Reliability (IH)`          <chr> "Supported", "Approved", "Enhanced",...
#  #> $ `Reliability (Mouse Brain)` <lgl> NA, NA, NA, NA, NA, NA
#  #> $ `Reliability (IF)`          <chr> "Supported", "Uncertain", "Approved"...
#  #> $ `Subcellular location`      <chr> "Plasma membrane", "Mitochondria", "...
#  #> $ `Prognostic p-value`        <chr> "Glioma:5.12e-5 (unfavourable), Panc...
#  #> $ `RNA cancer category`       <chr> "Expressed in all", "Expressed in al...
#  #> $ `RNA tissue category`       <chr> "Expressed in all", "Expressed in al...
#  #> $ `RNA TS`                    <lgl> NA, NA, NA, NA, NA, NA
#  #> $ `RNA TS TPM`                <chr> NA, NA, NA, "esophagus: 250.7;skin: ...
#  #> $ `TPM max in non-specific`   <chr> "seminal vesicle: 2273.0", "placenta...
#  #> $ `RNA cell line category`    <chr> "Cell line enhanced", "Cell line enh...
#  #> $ `RNA CS`                    <lgl> NA, NA, NA, NA, NA, NA
#  #> $ `RNA CS TPM`                <chr> "ASC diff: 2031.3", "U-87 MG: 437.4"...

## -----------------------------------------------------------------------------
#  ## since the query give you the latest HPA version, get the latest datasets to match
#  latest_datasets <- hpaDownload()
#  
#  hpaVis(data = latest_datasets,
#         targetGene = query_df$Gene,
#         targetTissue = "cerebral cortex",
#         targetCancer = "glioma")

## -----------------------------------------------------------------------------
#  ## Download and import the xml files for proteins of interest
#  query_xml_list <- lapply(query_df$Ensembl, hpaXmlGet)
#  
#  ## Extract protein classes as a list of data frame
#  query_protein_classes <- lapply(query_xml_list, hpaXmlProtClass)
#  names(query_protein_classes) <- query_df$Gene # name list items
#  
#  ## Turn the list into a data frame
#  query_protein_classes_df <-
#      tidyr::unnest(tibble::enframe(query_protein_classes, name = "protein"))
#  
#  glimpse(query_protein_classes_df)
#  
#  #> Observations: 122
#  #> Variables: 5
#  #> $ protein   <chr> "CD81", "CD81", "CD81", "CD81", "CD81", "CD81", "CD81"...
#  #> $ id        <chr> "Cd", "Ja", "Jf", "Ma", "Md", "Me", "Mf", "Mg", "Mh", ...
#  #> $ name      <chr> "CD markers", "Transporters", "Accessory Factors Invol...
#  #> $ parent_id <chr> NA, NA, "Ja", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
#  #> $ source    <chr> "UniProt", "TCDB", "TCDB", "MDM", "MDM", "MEMSAT3", "M...
#  
#  
#  ## Which proteins in our list are also potential drug targets?
#  filter(query_protein_classes_df, name == "Potential drug targets")
#  
#  #> # A tibble: 2 x 5
#  #>   protein id    name                   parent_id source
#  #>   <chr>   <chr> <chr>                  <chr>     <chr>
#  #> 1 CD81    Pd    Potential drug targets <NA>      HPA
#  #> 2 PRNP    Pd    Potential drug targets <NA>      HPA

Try the HPAanalyze package in your browser

Any scripts or data that you put into this service are public.

HPAanalyze documentation built on Nov. 26, 2020, 2:01 a.m.