knitr::opts_chunk$set( echo = TRUE )

Introduction

This document describes, how the lipidomer::liverlipidome data set was prepared.

The data come from the project PR000633 in the Metabolomics Workbench.

Citation of the Data

Publication

Gorden, D. Lee, et al. Biomarkers of NAFLD Progression: a Lipidomics Approach to an Epidemic. J Lip Res. 56(3) 722-36 (2015). https://dx.doi.org/10.1194/jlr.P056002

Repository

This data is available at the NIH Common Fund's National Metabolomics Data Repository (NMDR) website, the Metabolomics Workbench, https://www.metabolomicsworkbench.org, where it has been assigned Project ID PR000633. The data can be accessed directly via it's Project DOI: 10.21228/M8V961 This work is supported by NIH grant, U2C-DK119886.

URL: http://dx.doi.org/10.21228/M8V961

Download the Data

Specify the File Paths

url.download <-
  c(
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST000915&ANALYSIS_ID=AN001485&MODE=d",
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST000915&ANALYSIS_ID=AN001486&MODE=d",
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST000915&ANALYSIS_ID=AN001487&MODE=d",
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST000915&ANALYSIS_ID=AN001488&MODE=d",
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST000915&ANALYSIS_ID=AN001489&MODE=d",
    "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST000915&ANALYSIS_ID=AN001490&MODE=d"

  )

Download the Data

sample.info.loaded <-
  read.table(
    file = url.download[ 1 ],
    check.names = FALSE,
    header = FALSE,
    nrows = 158-69-1,
    # sep = "\t",
    skip = 69,
    stringsAsFactors = FALSE
  )

data.loaded <- list()

data.loaded[[ 1 ]] <-
  read.table(
    file = url.download[ 1 ],
    check.names = FALSE,
    header = TRUE,
    nrows = 373-274-1,
    sep = "\t",
    skip = 274,
    stringsAsFactors = FALSE
  )

data.loaded[[ 2 ]] <-
  read.table(
    file = url.download[ 2 ],
    check.names = FALSE,
    header = TRUE,
    na.strings = "ND",
    nrows = 302-261-1,
    sep = "\t",
    skip = 261,
    stringsAsFactors = FALSE
  )

data.loaded[[ 3 ]] <-
  read.table(
    file = url.download[ 3 ],
    check.names = FALSE,
    header = TRUE,
    nrows = 288-276-1,
    sep = "\t",
    skip = 276,
    stringsAsFactors = FALSE
  )

data.loaded[[ 4 ]] <-
  read.table(
    file = url.download[ 4 ],
    check.names = FALSE,
    header = TRUE,
    nrows = 456-265-1,
    sep = "\t",
    skip = 265,
    stringsAsFactors = FALSE
  )

data.loaded[[ 5 ]] <-
  read.table(
    file = url.download[ 5 ],
    check.names = FALSE,
    header = TRUE,
    nrows = 368-269-1,
    sep = "\t",
    skip = 269,
    stringsAsFactors = FALSE
  )

data.loaded[[ 6 ]] <-
  read.table(
    file = url.download[ 6 ],
    check.names = FALSE,
    header = TRUE,
    nrows = 373-261-1,
    sep = "\t",
    skip = 261,
    stringsAsFactors = FALSE
  )

Prepare the Data

Reformat

Sample Information

````r

sample.info <- sample.info.loaded

for ( i in 4:ncol( sample.info ) ) {

tmp <- stringr::str_split_fixed( string = sample.info[ , i ], pattern = "(\:)|(\=)", n = 2 )

sample.info[ , i ] <- tmp[ , 2 ]

colnames( sample.info )[ i ] <- tmp[ 1, 1 ]

sample.info[ , i ] <- stringr::str_replace_all( string = sample.info[ , i ], pattern = "\;", replacement = "" )

}

sample.info[ sample.info == "-"] <- NA

colnames( sample.info )[ colnames( sample.info ) == "V3" ] <- "ID"

sample.info <- sample.info[ , 3:ncol( sample.info ) ]

sample.info$"Diagnosis" <- factor( x = sample.info$"Diagnosis", levels = c( "Normal", "Steatosis", "NASH", "Cirrhosis" ) )

names.numeric <- c( "BMI", "AGE", "AST", "ALT", "ALKP", "TBIL", "GLUCOSE" )

for ( i in 1:length( names.numeric ) ) {

sample.info[[ names.numeric[ i ] ]] <- as.numeric( sample.info[[ names.numeric[ i ] ]] )

}

### Lipids

```r

data <- data.loaded

data <-
  lapply(
    X = data,
    FUN =
      function( x ) {

        rownames( x ) <- x$"Samples"

        x <- t( x[ -1, -1 ] )

        y <-
          apply(
            X = x,
            MAR = 2,
            FUN = as.numeric
          )

        rownames( y ) <- rownames( x )

        return( y )

      }
  )

Merge Lipid Data

data.merged <- data[[ 1 ]]

for ( i in 2:length( data ) ) {

  data.merged <-
    merge(
      x = data.merged,
      y = data[[ i ]],
      by = "row.names"
    )

  rownames( data.merged ) <- data.merged$"Row.names"

  data.merged$"Row.names" <- NULL

}

data <- data.merged

Reformat Lipid Names

is.C.name <-
  stringr::str_detect(
    string = colnames( data ),
    pattern = "C[0-9]+" # "C[0-9]+:?[0-9]+?D?H?"
  )

if ( any( is.C.name ) ) {

  colnames( data )[ is.C.name ] <-
    stringr::str_replace(
      string = colnames( data )[ is.C.name ],
      pattern = "DH ",
      replacement = " DH-"
    )

  name.split <-
    stringr::str_split_fixed(
      string = colnames( data )[ is.C.name ],
      pattern = " ",
      n = 3
    )

  number <-
    stringr::str_replace_all(
      string = name.split[ , 1 ],
      pattern = "[A-Z]+",
      replacement = ""
    )

  number.split <-
    stringr::str_split_fixed(
      string = number,
      pattern = ":",
      n = 2
    )

  number.split[ which( number.split[ , 2 ] == "" ), 2 ] <- "0"

  number.split[ , 1 ] <- as.numeric( number.split[ , 1 ] ) + 18

  name.split[ , 2 ] <-
    stringr::str_replace(
      string = name.split[ , 2 ],
      pattern = "Sphingomyelin",
      replacement = "SM"
    )

  colnames( data )[ is.C.name ] <-
    paste0(
      name.split[ , 2 ],
      "(",
      number.split[ , 1 ],
      ":",
      number.split[ , 2 ],
      ")"
    )

}

is.numbered <-
  grepl(
    x = colnames( data ),
    pattern = "[A-Za-z]\\s?\\("
  )

data <- data[ , is.numbered ]

# Fixes to names

is.N3 <-
  grepl(
    x = colnames( data ),
    pattern = " N3" )

colnames( data )[ is.N3 ] <-
  stringr::str_replace(
    string = colnames( data )[ is.N3 ],
    pattern = " N3",
    replacement = ""
  )

colnames( data )[ is.N3 ] <-
  stringr::str_replace(
    string = colnames( data )[ is.N3 ],
    pattern = "\\(",
    replacement = "-N3("
  )

is.N6 <-
  grepl(
    x = colnames( data ),
    pattern = " N6" )

colnames( data )[ is.N6 ] <-
  stringr::str_replace(
    string = colnames( data )[ is.N6 ],
    pattern = " N6",
    replacement = ""
  )

colnames( data )[ is.N6 ] <-
  stringr::str_replace(
    string = colnames( data )[ is.N6 ],
    pattern = "\\(",
    replacement = "-N6("
  )

is.N9 <-
  grepl(
    x = colnames( data ),
    pattern = " N9" )

colnames( data )[ is.N9 ] <-
  stringr::str_replace(
    string = colnames( data )[ is.N9 ],
    pattern = " N9",
    replacement = ""
  )

colnames( data )[ is.N9 ] <-
  stringr::str_replace(
    string = colnames( data )[ is.N9 ],
    pattern = "\\(",
    replacement = "-N9("
  )

tmp <-
  grep(
    x = colnames( data ),
    pattern = "p\\)"
  )

colnames( data )[ tmp ] <-
  stringr::str_replace(
    string = colnames( data )[ tmp ],
    pattern = "\\(",
    replacement = "-P("
  )

colnames( data )[ tmp ] <-
  stringr::str_replace(
    string = colnames( data )[ tmp ],
    pattern = "p\\)",
    replacement = ")"
  )

colnames( data ) <-
  stringr::str_replace(
    string = colnames( data ),
    pattern = " \\(",
    replacement = "("
  )
colnames( data ) <-
  stringr::str_replace(
    string = colnames( data ),
    pattern = "\\.[0-9]",
    replacement = ""
  )

Round Observations

data <- signif( x = data, digits = 3 )

Merge Lipids and Sample Information

names.lipids <- colnames( data )

data <-
  merge(
    x = sample.info,
    y = data,
    by.x = "ID",
    by.y = "row.names"
  )

Omit Lipids with Less Than Two Observations Per Group

N.found <-
  by(
    data = !is.na( data[ , names.lipids ] ),
    INDICES = data$"Diagnosis",
    FUN = colSums
  )

N.found <- simplify2array( N.found )

is.found <- ( N.found > 1 )

is.found <-
  apply(
    X = is.found,
    MAR = 1,
    FUN = all
  ) 

names.missing <- names.lipids

names.missing <- names.missing[ !is.found ]

print( names.missing )

data <- data[ , !( colnames( data ) %in% names.missing ) ]

names.lipids <- names.lipids[ is.found ]

Convert the Data into a Longer Format

liverlipidome <- 
  tidyr::pivot_longer(
    data = data,
    cols = names.lipids,
    names_to = "Lipid_Name",
    values_to = "Lipid_Level"
  )

Save the Resulting Data

save(
  liverlipidome,
  file = "../data/liverlipidome.rda",
  compress = "xz"
)

SessionInfo

utils::sessionInfo()


tommi-s/lipidomeR documentation built on March 17, 2020, 12:14 a.m.