knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%", dpi = 300, fig.width = 7 ) options(knitr.kable.NA = '') hcolor <- "#363B74" tcolor <- "#EEEEEE" `%>%` <- dplyr::`%>%` base_url <- "https://villegar.github.io/MetaPipe/" web <- function(page, is.vignette = TRUE) { return(paste0(base_url, ifelse(is.vignette, "articles/", ""), page)) }
```{css styles, echo = FALSE, eval = FALSE} p { margin-bottom: 0px !important; }
<!-- Utilitary functions --> ```r # Include sub-index with HTML tag sub sub_html <- function(str1, str2) { return(paste0("[", str1, "]<sub>", str2, "</sub>")) }
r badger::badge_cran_release("MetaPipe", color = "black")
r badger::badge_devel("villegar/MetaPipe", "yellow")
r badger::badge_codecov("villegar/MetaPipe")
r badger::badge_github_actions()
MetaPipe: A High-Performance Computing Pipeline for QTL Mapping of Large Ionomic and Metabolomic Datasets
The goal of MetaPipe is to provide an easy to use and powerful tool capable of performing QTL mapping analyses.
You can install the released version of MetaPipe from CRAN with:
install.packages("MetaPipe")
And the development version from GitHub with:
# install.packages(c("hexSticker", "kableExtra", "qpdf", "remotes") remotes::install_github("villegar/MetaPipe", build_vignettes = TRUE)
For details about the data structure and extended documentation, see the vignette Load Raw Data.
vignette("load-raw-data", package = "MetaPipe")
MetaPipe::load_raw(raw_data_filename = "FILE.CSV", excluded_columns = c(...))
where raw_data_filename
is the filename containing the raw data, both absolute and relative paths are accepted. Next, the argument excluded_columns
is a vector containing the indices of the properties, e.g. c(2, 3, ..., M)
.
# F1 Seedling Ionomics dataset ionomics_path <- system.file("extdata", "ionomics.csv", package = "MetaPipe", mustWork = TRUE) ionomics <- MetaPipe::load_raw(ionomics_path) knitr::kable(ionomics[1:5, 1:8])
For extended documentation, see the vignette Replace Missing Data.
vignette("replace-missing-data", package = "MetaPipe")
MetaPipe::replace_missing(raw_data = example_data, excluded_columns = c(2), # Optional out_prefix = "metapipe", prop_na = 0.5, replace_na = FALSE)
where raw_data
is a data frame containing the raw data, as described in Load Raw Data and excluded_columns
is a vector containing the indices of the properties, e.g. c(2, 3, ..., M)
. The other arguments are optional, out_prefix
is the prefix for output files, prop_na
is the proportion of NA values (used to drop traits), and replace_na
is a logical flag to indicate whether or not NA
s should be replace by half of the minimum value within each variable.
# F1 Seedling Ionomics dataset data(ionomics) # Includes some missing data ionomics_rev <- MetaPipe::replace_missing(ionomics, c(1, 2)) ionomics_rev <- MetaPipe::replace_missing(ionomics, excluded_columns = c(1, 2), prop_na = 0.025) ionomics_rev <- MetaPipe::replace_missing(ionomics, excluded_columns = c(1, 2), replace_na = TRUE) knitr::kable(ionomics_rev[1:5, 1:8])
For extended documentation, see the vignette Assess Normality.
vignette("assess-normality", package = "MetaPipe")
MetaPipe
assesses the normality of variables (traits) by performing a
Shapiro-Wilk test on the raw data (see
Load Raw Data and
Replace Missing Data. Based on whether
or not the data approximates a normal distribution, an array of transformations
will be computed, and the normality assessed one more time.
MetaPipe::assess_normality(raw_data = raw_data, excluded_columns = c(2, 3, ..., M), # Optional cpus = 1, out_prefix = "metapipe", plots_dir = tempdir(), transf_vals = c(2, exp(1), 3, 4, 5, 6, 7, 8, 9, 10), alpha = 0.05, pareto_scaling = FALSE, show_stats = TRUE)
where raw_data
is a data frame containing the raw data, as described in
Load Raw Data and excluded_columns
is a
vector containing the indices of the properties, e.g. c(2, 3, ..., M)
. The
other arguments are optional, cpus
is the number of cores to use, in other
words, the number of concurrent traits to process, out_prefix
is the prefix
for output files, plots_dir
is the output directory where the plots will be
stored, transf_vals
is a vector containing the transformation values to
be used when transforming the original data, alpha
is the significance level
for the Wilk-Shapiro tests, pareto_scaling
is a boolean flag to indicate
whether or not to scale the traits to the same scale, and show_stats
is a
boolean flag to show or hide some general statistics of the normalisation
process.
# F1 Seedling Ionomics dataset data(ionomics) # Includes some missing data ionomics_rev <- MetaPipe::replace_missing(ionomics, excluded_columns = c(1, 2), replace_na = TRUE) ionomics_normalised <- MetaPipe::assess_normality(ionomics_rev, excluded_columns = c(1, 2), transf_vals = c(2, exp(1)), out_prefix = "README-ionomics", plots_dir = "man/figures/", pareto_scaling = FALSE) # Extract normalised features ionomics_norm <- ionomics_normalised$norm ionomics_skew <- ionomics_normalised$skew
The function call to MetaPipe::assess_normality
will print a summary of the
transformations performed (if any), as well as an overview of the number of
traits that should be considered normal and skewed. Next, we can preview
some of the partial output of the normality assessment process:
# Normal traits knitr::kable(ionomics_norm[1:5, ]) # Skewed traits (partial output) knitr::kable(ionomics_skew[1:5, 1:8])
Among the transformed traits, we have B11
and Na23
. Both of which seem to be
skewed, but after a simple transformation, can be classify as normalised traits.
qtl_scone <- function(x_data, cpus = 1, ...)
where x_data
# F1 Seedling Ionomics dataset data(father_riparia) # Genetic map # Load cross file with genetic map and raw data for normal traits x <- MetaPipe::read.cross(father_riparia, ionomics_norm, genotypes = c("nn", "np", "--")) set.seed(123) x <- qtl::jittermap(x) x <- qtl::calc.genoprob(x, step = 1, error.prob = 0.001) x_scone <- MetaPipe::qtl_scone(x, 1, model = "normal", method = "hk")
MetaPipe:::PCA(ionomics_norm[, -1]) MetaPipe:::PCA(ionomics_skew[, -1])
# List all generated histograms ionomics_hists <- list.files("man/figures", "HIST*", full.names = TRUE) # Filter displayed histograms idx <- !sapply(ionomics_hists, grepl, pattern = "Na23") & !sapply(ionomics_hists, grepl, pattern = "B11") filenames <- c("metapipe_NA_raw_data.csv", ionomics_hists[idx], list.files(".", "README-ionomics*")) output <- lapply(filenames, file.remove)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.