knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = TRUE, out.width = "100%" )
After the raw data processing
, peak tables for positive and negative mode have been generated.
Next, we need to get the peak table and sample information and organize them as mass_dataset
class objects.
library(tidymass) library(tidyverse)
Load object
.
load("mzxml_ms1_data/POS/Result/object") object_pos <- object object_pos
Read sample information.
sample_info_pos <- readr::read_csv("sample_info/sample_info_pos.csv") head(sample_info_pos)
Add sample_info_pos
to object_pos
object_pos %>% extract_sample_info() %>% head() object_pos <- object_pos %>% activate_mass_dataset(what = "sample_info") %>% dplyr::select(-c("group", "class", "injection.order"))
object_pos = object_pos %>% activate_mass_dataset(what = "sample_info") %>% left_join(sample_info_pos, by = "sample_id") object_pos %>% extract_sample_info() %>% head()
Save the object_pos
in a new folder named as data_cleaning
.
dir.create("data_cleaning/POS", showWarnings = FALSE, recursive = TRUE) save(object_pos, file = "data_cleaning/POS/object_pos")
object_pos dim(object_pos) object_pos %>% activate_mass_dataset(what = "sample_info") %>% dplyr::count(class) object_pos %>% activate_mass_dataset(what = "sample_info") %>% dplyr::count(group) object_pos %>% activate_mass_dataset(what = "sample_info") %>% dplyr::count(batch)
So for positive mode, we have 259 samples and 10,149 variables. 220 subject samples and 39 QC samples. 110 control samples and 110 case samples. Two batches in total, 112 samples in batch 1 and 147 in batch 2.
Next, we can get the peak distributation plot of positive mode.
object_pos %>% `+`(1) %>% log(10) %>% show_mz_rt_plot() + scale_size_continuous(range = c(0.01, 2))
We can explore the missing values (mvs) in positive mode data.
get_mv_number(object = object_pos)
785,821 mvs in total.
get_mv_number(object = object_pos, by = "sample") %>% head()
Missing value number in each sample.
get_mv_number(object = object_pos, by = "variable") %>% head()
Missing value number in each variable.
We can use the figure to show the missing value information.
show_missing_values(object = object_pos, show_column_names = FALSE, percentage = TRUE)
Show the mvs in samples.
show_sample_missing_values(object = object_pos, percentage = TRUE)
Show the mvs in variables
show_variable_missing_values( object = object_pos, percentage = TRUE, show_x_text = FALSE, show_x_ticks = FALSE ) + scale_size_continuous(range = c(0.01, 1))
Load object
.
load("mzxml_ms1_data/NEG/Result/object") object_neg <- object object_neg
Read sample information.
sample_info_neg <- readr::read_csv("sample_info/sample_info_neg.csv") head(sample_info_neg)
Add sample_info_neg
to object_neg
object_neg %>% extract_sample_info() %>% head() object_neg <- object_neg %>% activate_mass_dataset(what = "sample_info") %>% dplyr::select(-c("group", "class", "injection.order"))
object_neg <- object_neg %>% activate_mass_dataset(what = "sample_info") %>% left_join(sample_info_neg, by = "sample_id") object_neg %>% extract_sample_info() %>% head()
Save the object_neg
in a new folder named as data_cleaning
.
dir.create("data_cleaning/NEG", showWarnings = FALSE, recursive = TRUE) save(object_neg, file = "data_cleaning/NEG/object_neg")
object_neg dim(object_neg) object_neg %>% activate_mass_dataset(what = "sample_info") %>% dplyr::count(class) object_neg %>% activate_mass_dataset(what = "sample_info") %>% dplyr::count(group) object_neg %>% activate_mass_dataset(what = "sample_info") %>% dplyr::count(batch)
So for negative mode, we have 259 samples and 8,804 variables. 220 subject samples and 39 QC samples. 110 control samples and 110 case samples. Two batches in total, 112 samples in batch 1 and 147 in batch 2.
Next, we can get the peak distributation plot of negative mode.
object_neg %>% `+`(1) %>% log(10) %>% show_mz_rt_plot() + scale_size_continuous(range = c(0.01, 2))
We can explore the missing values in negitive mode data.
get_mv_number(object = object_neg)
748,253 mvs in total.
get_mv_number(object = object_neg, by = "sample") %>% head()
Missing value number in each sample.
get_mv_number(object = object_neg, by = "variable") %>% head()
Missing value number in each variable.
We can use the figure to show the missing value information.
show_missing_values(object = object_neg, show_column_names = FALSE, percentage = TRUE)
Show the mvs in samples.
show_sample_missing_values(object = object_neg, percentage = TRUE)
Show the mvs in variables.
show_variable_missing_values(object = object_neg, percentage = TRUE, show_x_text = FALSE, show_x_ticks = FALSE) + scale_size_continuous(range = c(0.01, 1))
So from those exploration, we have a brief summary of our data. Next, we will use masscleaner
pacakge to do the data cleaning of data.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.