test-augmented.R
In OpenSpecy: Analyze, Process, Identify, and Share Raman and (FT)IR Spectra

#Libraries ----
library(dplyr)

#Functions ----
RandomlyAugmentDataBaseline <- function(Data) {
    order <- sample(1:5, 1)
    valuestosample <- c(-100:-1, 1:100)
    if(order == 1) Data*sample(valuestosample, 1)+sample(valuestosample, 1)
    else if(order == 2) sample(valuestosample, 1)*Data^2 + sample(valuestosample, 1)*Data +sample(valuestosample, 1)
    else if(order == 3) sample(valuestosample, 1)*Data^3 + sample(valuestosample, 1)* Data^2 + sample(valuestosample, 1)*Data+sample(valuestosample, 1)
    else if(order == 4) sample(valuestosample, 1)* Data^4 + sample(valuestosample, 1)* Data^3 + sample(valuestosample, 1)* Data^2 + sample(valuestosample, 1)*Data + sample(valuestosample, 1)
    else sample(valuestosample, 1)* Data^5 +sample(valuestosample, 1)*Data^4 + sample(valuestosample, 1)*Data^3 + sample(valuestosample, 1)*Data^2 + sample(valuestosample, 1)*Data+sample(valuestosample, 1)

}

augment_full <- function(Data){
    Data %>%
        select(-group) %>%
        dplyr::group_by(sample_name) %>%
        dplyr::arrange(wavenumber) %>%
        dplyr::mutate(intensity_og = minmax(intensity)) %>%
        dplyr::mutate(intensity_aug1 = minmax(intensity_og  + rnorm(length(wavenumber), mean = 0, sd = 1/sample(c(30:300), 1)))) %>%
        dplyr::mutate(intensity_aug2 = minmax(intensity_aug1  + minmax(RandomlyAugmentDataBaseline(wavenumber)))) %>%
        ungroup()
}

#Augment the library data ----
# Fetch current spectral library from https://osf.io/x7dpz/
get_lib()

# Load library into global environment
spec_lib <- load_lib()

#subset 100 of each library
raman_subset <- unique(spec_lib$raman$library$sample_name)[sample(1:length(unique(spec_lib$raman$library$sample_name)), 100, replace = F)]
ftir_subset <- unique(spec_lib$ftir$library$sample_name)[sample(1:length(unique(spec_lib$ftir$library$sample_name)), 100, replace = F)]

augmented_raman <- augment_full(spec_lib$raman$library %>%
                                    filter(sample_name %in% raman_subset)) %>%
                    mutate(intensity = intensity_aug2)

augmented_ftir <- augment_full(spec_lib$ftir$library %>%
                                   filter(sample_name %in% ftir_subset))%>%
                    mutate(intensity = intensity_aug2)


#Might consider adding this in at some point.
# Adjust spectral intensity
#raman_adj <- raman_hdpe %>%
#    adj_intens()

# Smooth and background-correct spectrum
raman_proc <- augmented_raman %>%
    group_by(sample_name) %>%
    smooth_intens(p = 3) #%>%
    subtr_baseline(degree = 8) %>%
    mutate()

ftir_proc <- augmented_ftir %>%
    group_by(sample_name) %>%
    smooth_intens() %>%
    subtr_baseline()

spectrum = raman_subset[100]

#Check that augmentation and cleaning performed as expected.
ggplot() +
    geom_line(data = augmented_raman %>%
                   filter(sample_name == spectrum),
               aes(x = wavenumber, y = intensity)
                   ) +
    geom_line(data = augmented_raman %>%
                    filter(sample_name == spectrum) %>%
                    smooth_intens(p = 3) %>%
                    subtr_baseline(degree = 8),
                aes(x = wavenumber, y = intensity))


#Check that cleaned match is to something reasonable.
augmented_raman %>%
    filter(sample_name == spectrum) %>%
    smooth_intens(p = 3) %>%
    subtr_baseline(degree = 8) %>%
    match_spec(library = spec_lib, which = "raman")

spectrum


#Match back to the library using the Open Specy functions to see if we can get back to the correct identity for the top match.
raman_identities <- c()
for(spectrum in raman_subset){

    identity <- unlist(spec_lib$raman$metadata[sample_name == spectrum, "spectrum_identity"])

    topmatch <- augmented_raman %>%
        filter(sample_name == spectrum) %>%
        smooth_intens(p = 3) %>%
        subtr_baseline(degree = 8) %>%
        match_spec(library = spec_lib, which = "raman", top_n = 1) %>%
        select(spectrum_identity) %>%
        unlist()

    raman_identities <- c(raman_identities, identity == topmatch)

}

#Needs to be above 80%
sum(raman_identities)/length(raman_identities)


#FTIR validation
ftir_identities <- c()
for(spectrum in ftir_subset){

    identity <- unlist(spec_lib$ftir$metadata[sample_name == spectrum, "spectrum_identity"])

    topmatch <- augmented_ftir %>%
        filter(sample_name == spectrum) %>%
        smooth_intens(p = 3) %>%
        subtr_baseline(degree = 8) %>%
        match_spec(library = spec_lib, which = "ftir", top_n = 1) %>%
        select(spectrum_identity) %>%
        unlist()

    ftir_identities <- c(ftir_identities, identity == topmatch)

}

#Needs to be above 80%
sum(ftir_identities)/length(ftir_identities)

Any scripts or data that you put into this service are public.

OpenSpecy documentation built on Sept. 14, 2024, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

OpenSpecy
Analyze, Process, Identify, and Share Raman and (FT)IR Spectra

tests/validation/test-augmented.R
In OpenSpecy: Analyze, Process, Identify, and Share Raman and (FT)IR Spectra

Try the OpenSpecy package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OpenSpecy Analyze, Process, Identify, and Share Raman and (FT)IR Spectra

tests/validation/test-augmented.R In OpenSpecy: Analyze, Process, Identify, and Share Raman and (FT)IR Spectra

Try the OpenSpecy package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OpenSpecy
Analyze, Process, Identify, and Share Raman and (FT)IR Spectra

tests/validation/test-augmented.R
In OpenSpecy: Analyze, Process, Identify, and Share Raman and (FT)IR Spectra