NACHO Analysis

knitr::opts_chunk$set(
  eval = TRUE,
  collapse = TRUE,
  # results = "asis",
  include = TRUE,
  echo = TRUE,
  warning = TRUE,
  message = TRUE,
  error = TRUE,
  # tidy = FALSE,
  # crop = TRUE,
  # autodep = TRUE,
  fig.align = "center",
  cache = FALSE
)
knitr::include_graphics(path = "nacho_hex.png")

Installation

# Install NACHO from CRAN:
install.packages("NACHO")

# Or the the development version from GitHub:
# install.packages("remotes")
remotes::install_github("mcanouil/NACHO")

Overview

cat(readLines(system.file("app", "www", "about-nacho.md", package = "NACHO"))[-c(1, 2)], sep = "\n")
print(citation("NACHO"), "html")
print(citation("NACHO"), "bibtex")

Analyse NanoString data

Load packages

library(NACHO)
library(GEOquery, quietly = TRUE, warn.conflicts = FALSE)

Download GSE70970 from GEO (or use your own data)

data_directory <- file.path(tempdir(), "GSE70970", "Data")

# Download data
gse <- getGEO("GSE70970")
getGEOSuppFiles(GEO = "GSE70970", baseDir = tempdir())
# Unzip data
untar(
  tarfile = file.path(tempdir(), "GSE70970", "GSE70970_RAW.tar"),
  exdir = data_directory
)
# Get phenotypes and add IDs
targets <- pData(phenoData(gse[[1]]))
targets$IDFILE <- list.files(data_directory)

Import RCC files

GSE70970 <- load_rcc(data_directory, targets, id_colname = "IDFILE")

Perform the analyses using limma

library(limma)

Get the phenotypes

selected_pheno <- GSE70970[["nacho"]][
  j = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)),
  .SDcols = c("IDFILE", "age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1")
]
selected_pheno <- na.exclude(selected_pheno)
head(selected_pheno)

Get the normalised counts

expr_counts <- GSE70970[["nacho"]][
  i = grepl("Endogenous", CodeClass),
  j = as.matrix(
    dcast(.SD, Name ~ IDFILE, value.var = "Count_Norm"),
    "Name"
  ),
  .SDcols = c("IDFILE", "Name", "Count_Norm")
]
expr_counts[1:5, 1:5]

Alternatively, "Accession" number is also available.

GSE70970[["nacho"]][
  i = grepl("Endogenous", CodeClass),
  j = as.matrix(
    dcast(.SD, Accession ~ IDFILE, value.var = "Count_Norm"),
    "Accession"
  ),
  .SDcols = c("IDFILE", "Accession", "Count_Norm")
]

Select phenotypes and counts

  1. Make sure count matrix and phenotypes have the same samples
samples_kept <- intersect(selected_pheno[["IDFILE"]], colnames(expr_counts))
expr_counts <- expr_counts[, samples_kept]
selected_pheno <- selected_pheno[IDFILE %in% c(samples_kept)]
  1. Build the numeric design matrix
design <- model.matrix(~ `disease.event:ch1`, selected_pheno)
  1. limma
eBayes(lmFit(expr_counts, design))

Perform the analyses using lm (or any other model)

GSE70970[["nacho"]][
  i = grepl("Endogenous", CodeClass),
  j = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)),
  .SDcols = c(
    "IDFILE", "Name", "Accession", "Count", "Count_Norm",
    "age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1"
  )
][
  Name %in% head(unique(Name), 10)
][
  j = as.data.table(
    coef(summary(lm(
      formula = Count_Norm ~ `disease.event:ch1`,
      data = na.exclude(.SD)
    ))),
    "term"
  ),
  by = c("Name", "Accession")
]


Try the NACHO package in your browser

Any scripts or data that you put into this service are public.

NACHO documentation built on Aug. 8, 2023, 1:08 a.m.