In tommi-s/lipidomeR: Integrative Visualizations of the Lipidome

knitr::opts_chunk$set( echo = TRUE )

Introduction

This document describes, how the lipidomer::cancerlipidome data set was prepared.

The data come from the project PR000742 in the Metabolomics Workbench.

Citation of the Data

Publication

Purwaha, P., et al. Unbiased Lipidomic Profiling of Triple-Negative Breast Cancer Tissues Reveals the Association of Sphingomyelin Levels with Patient Disease-Free Survival. Metabolites 2018, 8, 41. https://doi.org/10.3390/metabo8030041

Repository

This data is available at the NIH Common Fund's National Metabolomics Data Repository (NMDR) website, the Metabolomics Workbench, https://www.metabolomicsworkbench.org, where it has been assigned Project ID PR000742. The data can be accessed directly via it's Project DOI: 10.21228/M8RX01 This work is supported by NIH grant, U2C-DK119886.

URL: http://dx.doi.org/10.21228/M8RX01

Download the Data

Download the data set in mzTab format from the project PR000742 in the Metabolomics Workbench.

Specify the File Paths

url.download <-
  c(
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST001111&ANALYSIS_ID=AN001805&MODE=d",
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST001111&ANALYSIS_ID=AN001806&MODE=d"

  )

Download the Data

sample.info.loaded <-
  read.table(
    file = url.download[ 1 ],
    check.names = FALSE,
    header = FALSE,
    nrows = 148-29-1,
    sep = "\t",
    skip = 29,
    stringsAsFactors = FALSE
  )

data.loaded.pos <-
  read.table(
    file = url.download[ 1 ],
    check.names = FALSE,
    header = TRUE,
    nrows = 618-171-2,
    sep = "\t",
    skip = 171,
    stringsAsFactors = FALSE
  )

data.loaded.neg <-
  read.table(
    file = url.download[ 2 ],
    check.names = FALSE,
    header = TRUE,
    nrows = 505-171-2,
    sep = "\t",
    skip = 171,
    stringsAsFactors = FALSE
  )

Prepare the Data

Reformat the data sets
Merge lipidomics data from multiple measurements
Reformat the lipid names
Combine duplicate measurements from the same lipid into a single value
Omit classes with only one lipid
Merge lipidomics data with sample information
Convert the data into long format
Save the resulting data set

Reformat

Sample Information

sample.info <- sample.info.loaded

sample.info$"V3" <- as.character( sample.info$"V3" )

colnames( sample.info )[ colnames( sample.info ) == "V3" ] <- "ID"

sample.info$"V4" <-
  stringr::str_replace(
    string = sample.info$"V4",
    pattern = "[A-za-z]+\\:",
    replacement = ""
  )

colnames( sample.info )[ colnames( sample.info ) == "V4" ] <- "Group"

tmp <-
  stringr::str_split_fixed(
    string = sample.info$"V5",
    pattern = "; ",
    n = 3
  )

colnames( tmp ) <-
  stringr::str_extract(
    string = tmp[ 1, ],
    pattern = "[A-Za-z]+\\="
  )

colnames( tmp ) <-
  stringr::str_replace(
    string = colnames( tmp ),
    pattern = "\\=",
    replacement = ""
  )

tmp <-
  apply(
    X = tmp,
    MAR = 2,
    FUN = stringr::str_replace,
    # string = tmp,
    pattern = "[A-Za-z]+\\=",
    replacement = ""
  )

tmp <- data.frame( tmp, check.names = FALSE )

sample.info <-
  dplyr::bind_cols(
    sample.info,
    tmp
  )

sample.info[ sample.info$"Stage" == "NA", "Stage" ] <- NA
sample.info[ grepl( x = sample.info$"Type", pattern = "NA" ), "Type" ] <- NA
sample.info$"Stage" <- droplevels( sample.info$"Stage" )

sample.info <-
  sample.info[ 
    ,
    !grepl( x = colnames( sample.info ), pattern = "^V[0-9]" )
    ]

sample.info$"Race" <-
  stringr::str_replace_all(
    string = sample.info$"Race",
    pattern = "\\.",
    replacement = "-"
  )

sample.info$"Type" <-
  stringr::str_replace_all(
    string = sample.info$"Type",
    pattern = "\\.",
    replacement = "-"
  )

Lipids from the Positive Ion Mode

data.pos <- data.loaded.pos

rownames( data.pos ) <- data.pos$"Samples"

data.pos <- t( data.pos[ -1, -1 ] )

mode( data.pos ) <- "numeric"

Lipids from the Negative Ion Mode

data.neg <- data.loaded.neg

rownames( data.neg ) <- data.neg$"Samples"

data.neg <- t( data.neg[ -1, -1 ] )

mode( data.neg ) <- "numeric"

Merge Lipid Data

data <-
  merge(
    x = data.pos,
    y = data.neg,
    by = "row.names"
  )

rownames( data ) <- data$"Row.names"

data$"Row.names" <- NULL

Reformat Lipid Names

tmp <-
  stringr::str_detect(
    string = colnames( data ),
    pattern = "N\\-"
  )

colnames( data )[ tmp ] <-
  stringr::str_replace(
    string = colnames( data )[ tmp ],
    pattern = "N\\-\\(",
    replacement = "N-"
  )

colnames( data )[ tmp ] <-
  stringr::str_replace(
    string = colnames( data )[ tmp ],
    pattern = "noyl\\)",
    replacement = "noyl"
  )

colnames( data ) <-
  stringr::str_replace(
    string = colnames( data ),
    pattern = "\\([0-9].+$",
    replacement = ""
  )

tmp <-
  stringr::str_detect(
    string = colnames( data ),
    pattern = "[0-9]$"
  )

colnames( data )[ tmp ] <-
  paste0(
    colnames( data )[ tmp ],
    ")"
  )

tmp <-
  stringr::str_detect(
    string = colnames( data ),
    pattern = " [0-9]"
  )

colnames( data )[ tmp ] <-
  stringr::str_replace(
    string = colnames( data )[ tmp ],
    pattern = " ",
    replacement = "("
  )

tmp <-
  stringr::str_detect(
    string = colnames( data ),
    pattern = "[0-9] "
  )

colnames( data )[ tmp ] <-
  stringr::str_replace(
    string = colnames( data )[ tmp ],
    pattern = " ",
    replacement = ")"
  )

colnames( data ) <-
  stringr::str_replace(
    string = colnames( data ),
    pattern = "\\[.+\\]\\+",
    replacement = ""
  )

Combine Duplicate Lipids

names.duplicates <- names( which( table( colnames( data ) ) > 1 ) )

sum.duplicates <-
  lapply(
    X = names.duplicates,
    FUN =
      function( x ) {

        idx.i <- which( colnames( data ) == x )

        tmp <-
          apply(
            X = data[ , idx.i ],
            MAR = 1,
            FUN = prod,
            na.rm = TRUE
          )

        return( tmp )

      }
  )

sum.duplicates <- simplify2array( x = sum.duplicates )
colnames( sum.duplicates ) <- names.duplicates

sum.duplicates <-
  data.frame(
    sum.duplicates,
    check.names = FALSE
  )

data <-
  merge(
    x = data[ , !( colnames( data ) %in% names.duplicates ) ],
    y = sum.duplicates,
    by = "row.names"
  )

rownames( data ) <- data$"Row.names"

data$"Row.names" <- NULL

data <- data[ , order( colnames( data ), decreasing = FALSE ) ]

Omit Classes with Only One Lipid

data <- data[ , !grepl( x = colnames( data ), pattern = "SB N" ) ]

Round Observations

data <- signif( x = data, digits = 3 )

Merge Lipids and Sample Information

names.lipids <- colnames( data )

data <-
  merge(
    x = sample.info,
    y = data,
    by.x = "ID",
    by.y = "row.names"
  )

data$"Group" <- as.factor( x = data$"Group" )

Convert the Data into a Longer Format

cancerlipidome <- 
  tidyr::pivot_longer(
    data = data,
    cols = names.lipids,
    names_to = "Lipid_Name",
    values_to = "Lipid_Level"
  )

Save the Resulting Data

save(
  cancerlipidome,
  file = "../data/cancerlipidome.rda",
  compress = "xz"
)

SessionInfo

utils::sessionInfo()

tommi-s/lipidomeR documentation built on March 17, 2020, 12:14 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

tommi-s/lipidomeR
Integrative Visualizations of the Lipidome

In tommi-s/lipidomeR: Integrative Visualizations of the Lipidome

Introduction

Citation of the Data

Publication

Repository

Download the Data

Specify the File Paths

Download the Data

Prepare the Data

Reformat

Sample Information

Lipids from the Positive Ion Mode

Lipids from the Negative Ion Mode

Merge Lipid Data

Reformat Lipid Names

Combine Duplicate Lipids

Omit Classes with Only One Lipid

Round Observations

Merge Lipids and Sample Information

Convert the Data into a Longer Format

Save the Resulting Data

SessionInfo

R Package Documentation

Browse R Packages

We want your feedback!

tommi-s/lipidomeR Integrative Visualizations of the Lipidome

In tommi-s/lipidomeR: Integrative Visualizations of the Lipidome

Introduction

Citation of the Data

Publication

Repository

Download the Data

Specify the File Paths

Download the Data

Prepare the Data

Reformat

Sample Information

Lipids from the Positive Ion Mode

Lipids from the Negative Ion Mode

Merge Lipid Data

Reformat Lipid Names

Combine Duplicate Lipids

Omit Classes with Only One Lipid

Round Observations

Merge Lipids and Sample Information

Convert the Data into a Longer Format

Save the Resulting Data

SessionInfo

R Package Documentation

Browse R Packages

We want your feedback!

tommi-s/lipidomeR
Integrative Visualizations of the Lipidome