knitr::opts_chunk$set( echo = TRUE )
This document describes, how the lipidomer::cancerlipidome data set was prepared.
The data come from the project PR000742 in the Metabolomics Workbench.
Purwaha, P., et al. Unbiased Lipidomic Profiling of Triple-Negative Breast Cancer Tissues Reveals the Association of Sphingomyelin Levels with Patient Disease-Free Survival. Metabolites 2018, 8, 41. https://doi.org/10.3390/metabo8030041
This data is available at the NIH Common Fund's National Metabolomics Data Repository (NMDR) website, the Metabolomics Workbench, https://www.metabolomicsworkbench.org, where it has been assigned Project ID PR000742. The data can be accessed directly via it's Project DOI: 10.21228/M8RX01 This work is supported by NIH grant, U2C-DK119886.
URL: http://dx.doi.org/10.21228/M8RX01
url.download <- c( "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST001111&ANALYSIS_ID=AN001805&MODE=d", "https://www.metabolomicsworkbench.org/data/study_textformat_view.php?STUDY_ID=ST001111&ANALYSIS_ID=AN001806&MODE=d" )
sample.info.loaded <- read.table( file = url.download[ 1 ], check.names = FALSE, header = FALSE, nrows = 148-29-1, sep = "\t", skip = 29, stringsAsFactors = FALSE ) data.loaded.pos <- read.table( file = url.download[ 1 ], check.names = FALSE, header = TRUE, nrows = 618-171-2, sep = "\t", skip = 171, stringsAsFactors = FALSE ) data.loaded.neg <- read.table( file = url.download[ 2 ], check.names = FALSE, header = TRUE, nrows = 505-171-2, sep = "\t", skip = 171, stringsAsFactors = FALSE )
sample.info <- sample.info.loaded sample.info$"V3" <- as.character( sample.info$"V3" ) colnames( sample.info )[ colnames( sample.info ) == "V3" ] <- "ID" sample.info$"V4" <- stringr::str_replace( string = sample.info$"V4", pattern = "[A-za-z]+\\:", replacement = "" ) colnames( sample.info )[ colnames( sample.info ) == "V4" ] <- "Group" tmp <- stringr::str_split_fixed( string = sample.info$"V5", pattern = "; ", n = 3 ) colnames( tmp ) <- stringr::str_extract( string = tmp[ 1, ], pattern = "[A-Za-z]+\\=" ) colnames( tmp ) <- stringr::str_replace( string = colnames( tmp ), pattern = "\\=", replacement = "" ) tmp <- apply( X = tmp, MAR = 2, FUN = stringr::str_replace, # string = tmp, pattern = "[A-Za-z]+\\=", replacement = "" ) tmp <- data.frame( tmp, check.names = FALSE ) sample.info <- dplyr::bind_cols( sample.info, tmp ) sample.info[ sample.info$"Stage" == "NA", "Stage" ] <- NA sample.info[ grepl( x = sample.info$"Type", pattern = "NA" ), "Type" ] <- NA sample.info$"Stage" <- droplevels( sample.info$"Stage" ) sample.info <- sample.info[ , !grepl( x = colnames( sample.info ), pattern = "^V[0-9]" ) ] sample.info$"Race" <- stringr::str_replace_all( string = sample.info$"Race", pattern = "\\.", replacement = "-" ) sample.info$"Type" <- stringr::str_replace_all( string = sample.info$"Type", pattern = "\\.", replacement = "-" )
data.pos <- data.loaded.pos rownames( data.pos ) <- data.pos$"Samples" data.pos <- t( data.pos[ -1, -1 ] ) mode( data.pos ) <- "numeric"
data.neg <- data.loaded.neg rownames( data.neg ) <- data.neg$"Samples" data.neg <- t( data.neg[ -1, -1 ] ) mode( data.neg ) <- "numeric"
data <- merge( x = data.pos, y = data.neg, by = "row.names" ) rownames( data ) <- data$"Row.names" data$"Row.names" <- NULL
tmp <- stringr::str_detect( string = colnames( data ), pattern = "N\\-" ) colnames( data )[ tmp ] <- stringr::str_replace( string = colnames( data )[ tmp ], pattern = "N\\-\\(", replacement = "N-" ) colnames( data )[ tmp ] <- stringr::str_replace( string = colnames( data )[ tmp ], pattern = "noyl\\)", replacement = "noyl" ) colnames( data ) <- stringr::str_replace( string = colnames( data ), pattern = "\\([0-9].+$", replacement = "" ) tmp <- stringr::str_detect( string = colnames( data ), pattern = "[0-9]$" ) colnames( data )[ tmp ] <- paste0( colnames( data )[ tmp ], ")" ) tmp <- stringr::str_detect( string = colnames( data ), pattern = " [0-9]" ) colnames( data )[ tmp ] <- stringr::str_replace( string = colnames( data )[ tmp ], pattern = " ", replacement = "(" ) tmp <- stringr::str_detect( string = colnames( data ), pattern = "[0-9] " ) colnames( data )[ tmp ] <- stringr::str_replace( string = colnames( data )[ tmp ], pattern = " ", replacement = ")" ) colnames( data ) <- stringr::str_replace( string = colnames( data ), pattern = "\\[.+\\]\\+", replacement = "" )
names.duplicates <- names( which( table( colnames( data ) ) > 1 ) ) sum.duplicates <- lapply( X = names.duplicates, FUN = function( x ) { idx.i <- which( colnames( data ) == x ) tmp <- apply( X = data[ , idx.i ], MAR = 1, FUN = prod, na.rm = TRUE ) return( tmp ) } ) sum.duplicates <- simplify2array( x = sum.duplicates ) colnames( sum.duplicates ) <- names.duplicates sum.duplicates <- data.frame( sum.duplicates, check.names = FALSE ) data <- merge( x = data[ , !( colnames( data ) %in% names.duplicates ) ], y = sum.duplicates, by = "row.names" ) rownames( data ) <- data$"Row.names" data$"Row.names" <- NULL data <- data[ , order( colnames( data ), decreasing = FALSE ) ]
data <- data[ , !grepl( x = colnames( data ), pattern = "SB N" ) ]
data <- signif( x = data, digits = 3 )
names.lipids <- colnames( data ) data <- merge( x = sample.info, y = data, by.x = "ID", by.y = "row.names" ) data$"Group" <- as.factor( x = data$"Group" )
cancerlipidome <- tidyr::pivot_longer( data = data, cols = names.lipids, names_to = "Lipid_Name", values_to = "Lipid_Level" )
save( cancerlipidome, file = "../data/cancerlipidome.rda", compress = "xz" )
utils::sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.