# Factorize study variables # TODO: what about actual numerical variables? # studvars <- c(unlist(STUDY_VARIABLES), PROFILING_VARIABLES, REPLICATE_VARIABLES) # for (studvar in studvars){ # biocrates[[studvar]] <- factor(biocrates[[studvar]]) # }
The imported data was restructured as follows:
text <- " * Convert to **long table** (or [molten dataset](http://dx.doi.org/10.18637/jss.v059.i10)). Each row includes only one measurement, i.e. the data of one compound in one sample and all corresponding information. * Add **Sample Identification \"Blank\"** for blank samples, as they are empty when exported from MetIDQ." if (IS_BIOCRATES){ text <- paste0(text, sprintf(" * Add **Sample Type \"%s\"** for pooled QC samples. %s samples are identified by any occurrence of the term \"pool\" (case-insensitive) in the column %s (parameter `pool_indicator`). In contrast, **\"Reference QC\"** will refer to Biocrates' QC Level 2 samples.", SAMPLE_TYPE_POOLED_QC, SAMPLE_TYPE_POOLED_QC, params$pool_indicator )) } text <- paste0(text, " * Add **Sequence Position**, i.e. sample acquisition order. This position is calculated from MetIDQ's Well Position, as this one depicts a position by [row-major order](https://en.wikipedia.org/wiki/Row-_and_column-major_order) which does not reflect the actual acquisition order. * Add **Well Coordinates**, indicating the two dimensional position on the well plate. This position is calculated from MetIDQ's Well Position and indicates rows by letters and columns by numbers. * Add a **unique Sample Name**, which is a combination of Sample Identifier, Sequence Position and potentially Batch (if available). These names assure unambiguous identification and analysis of measurements within or between several batches (e.g. in particular technical samples such as calibration standard or reference QC samples share sample identifications). * Remove **nonessential columns**, as some of them currently interfere with proper data merging or processing later. This includes (for now, to be optimized/reduced): Plate Bar Code, Sample Bar Code, Submission Name, Material, Plate Production No., Plate Note, Run Number, Injection Number, Measurement Time, Sample Description, Collection Date, Org. Info and OP. * Unify **missing values**, i.e. \"\" (empty field), \"0\" (zero) and \"NA\" (not available) are converted to \"NA\". * **Factorize Compounds** with level order based on compound class and compound name." ) cat(text)
# Filter predefined samples removed_samples <- data.frame() if ("filter_regex" %in% names(params) && params$filter_regex != "") { # Print samples to be removed cat("### Sample filtering\n") cat(paste0("Samples removed due to regular expression filter `", params$filter_regex, "`:\n")) removed_samples <- biocrates %>% filter(grepl(params$filter_regex, Sample.Identification)) %>% select(Sample.Identification, Batch) %>% unique() %>% arrange(Batch, Sample.Identification) # Remove samples biocrates <- biocrates %>% filter(!grepl(params$filter_regex, Sample.Identification)) } easy_datatable(removed_samples, caption = "Removed samples", show_type = "statistics")
# Factorize compounds ordered by compound class and name if ("Class" %in% names(biocrates)) { compound_levels <- biocrates %>% arrange(Class, Compound) } else { compound_levels <- biocrates %>% arrange(Compound) } compound_levels <- compound_levels %>% select(Compound) %>% distinct() %>% pull(Compound) biocrates$Compound <- factor(biocrates$Compound, levels = compound_levels)
# TODO: count per batch? # Are there any SAMPLE_TYPE_BIOLOGICAL samples? More than one? AVAILABLE_BIOLOGICAL <- SAMPLE_TYPE_BIOLOGICAL %in% biocrates$Sample.Type ENOUGH_BIOLOGICAL <- biocrates %>% filter(Sample.Type == SAMPLE_TYPE_BIOLOGICAL) %>% select(Sample.Name) %>% distinct() %>% pull(Sample.Name) %>% length() > 1
# Are there any SAMPLE_TYPE_POOLED_QC samples? More than one? AVAILABLE_POOLED_QC <- SAMPLE_TYPE_POOLED_QC %in% biocrates$Sample.Type ENOUGH_POOLED_QC <- biocrates %>% filter(Sample.Type == SAMPLE_TYPE_POOLED_QC) %>% select(Sample.Name) %>% distinct() %>% pull(Sample.Name) %>% length() > 1
# Are there any ENV$SAMPLE_TYPE_REFERENCE_QC samples? More than one? AVAILABLE_REFERENCE_QC <- ENV$SAMPLE_TYPE_REFERENCE_QC %in% biocrates$Sample.Type ENOUGH_REFERENCE_QC <- biocrates %>% filter(Sample.Type == ENV$SAMPLE_TYPE_REFERENCE_QC) %>% select(Sample.Name) %>% distinct() %>% pull(Sample.Name) %>% length() > 1
# Are there any SAMPLE_TYPE_BLANK samples? More than one? AVAILABLE_BLANK <- SAMPLE_TYPE_BLANK %in% biocrates$Sample.Type ENOUGH_BLANK <- biocrates %>% filter(Sample.Type == SAMPLE_TYPE_BLANK) %>% select(Sample.Name) %>% distinct() %>% pull(Sample.Name) %>% length() > 1
# Set label type for plots (ordered by preference) # TODO: could be a parameter label_cols <- c( COLUMN_WELL_POSITION, COLUMN_SEQUENCE_POSITION, COLUMN_WELL_COORDINATES, COLUMN_SAMPLE_NAME ) best_label_col <- which(label_cols %in% names(biocrates))[1] assign("PLOT_SAMPLE_LABEL", label_cols[best_label_col], ENV)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.