filter_samples | R Documentation |
Filter samples/variables based on the conditions
filter_samples(object, flist, prune = TRUE, apply_to = "all")
filter_variables(
object,
flist,
prune = TRUE,
apply_to = "all",
according_to_samples = "all"
)
object |
(required) mass_dataset class object. |
flist |
(required) A function or list of functions that take a vector of abundance values and return a logical. |
prune |
(optional) A logical. Default |
apply_to |
(required) what variables you want to apply this function. Default is "all". If you only want to apply to specific variables, please set it as a vector of sample names. Other variables will be set as TRUE. |
according_to_samples |
(required) What samples used to filter variables. Default is "all". If you want to use only several samples, provide they names as a vector. |
A logical vector equal to the number of samples/variables in mass_dataset-class.
Alternatively, if prune==TRUE
, the pruned mass_dataset-class
object is returned instead.
Xiaotao Shen shenxt1990@outlook.com
data("expression_data")
data("sample_info")
data("variable_info")
object =
create_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info,
)
filter_samples(object, function(x) {
sum(is.na(x)) / length(x) < 0.4
})
filter_samples(object, function(x) {
sum(is.na(x)) / length(x) < 0.4
}, prune = FALSE)
##only apply to Subject sample
object2 =
filter_samples(
object = object,
flist = function(x) {
sum(is.na(x))/length(x) < 0.2
},
prune = TRUE,
apply_to = get_sample_id(object)[extract_sample_info(object)$class == "Subject"]
)
object2
library(tidyverse)
data("expression_data")
data("sample_info")
data("variable_info")
object =
create_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info,
)
object
####Filter variables which have more than 50% MVs in all samples.
library(tidyverse)
filter_variables(object, function(x) {
sum(is.na(x)) / length(x) < 0.5
}, prune = FALSE) %>%
head()
filter_variables(object, function(x) {
sum(is.na(x)) / length(x) < 0.5
},
prune = TRUE)
####Filter variables which have more than 50% MVs in only QC samples.
filter_variables(
object,
flist = function(x) {
sum(is.na(x)) / length(x) < 0.5
},
prune = TRUE,
according_to_samples =
get_sample_id(object)[extract_sample_info(object)$class == "QC"]
)
####Filter variables which have more than 50% MVs in QC or subject samples.
idx1 =
filter_variables(
object,
flist = function(x) {
sum(is.na(x)) / length(x) < 0.5
},
prune = FALSE,
according_to_samples =
get_sample_id(object)[extract_sample_info(object)$class == "QC"]
)
idx2 =
filter_variables(
object,
flist = function(x) {
sum(is.na(x)) / length(x) < 0.5
},
prune = FALSE,
according_to_samples =
get_sample_id(object)[extract_sample_info(object)$class == "Subject"]
)
idx =
which(idx1 | idx2)
object2 = object[idx,]
object2
####filter variables with RSD (in QC samples) < 30
object3 =
filter_variables(
object = object,
flist = function(x) {
rsd = sd(x) * 100 / mean(x)
rsd = ifelse(is.na(rsd), 100, rsd)
rsd < 30
},
apply_to = "all",
prune = TRUE,
according_to_samples = get_sample_id(object)[extract_sample_info(object)$class == "QC"]
)
object3
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.