tests/validation/test-augmented.R

#Libraries ----
library(dplyr)

#Functions ----
RandomlyAugmentDataBaseline <- function(Data) {
    order <- sample(1:5, 1)
    valuestosample <- c(-100:-1, 1:100)
    if(order == 1) Data*sample(valuestosample, 1)+sample(valuestosample, 1)
    else if(order == 2) sample(valuestosample, 1)*Data^2 + sample(valuestosample, 1)*Data +sample(valuestosample, 1)
    else if(order == 3) sample(valuestosample, 1)*Data^3 + sample(valuestosample, 1)* Data^2 + sample(valuestosample, 1)*Data+sample(valuestosample, 1)
    else if(order == 4) sample(valuestosample, 1)* Data^4 + sample(valuestosample, 1)* Data^3 + sample(valuestosample, 1)* Data^2 + sample(valuestosample, 1)*Data + sample(valuestosample, 1)
    else sample(valuestosample, 1)* Data^5 +sample(valuestosample, 1)*Data^4 + sample(valuestosample, 1)*Data^3 + sample(valuestosample, 1)*Data^2 + sample(valuestosample, 1)*Data+sample(valuestosample, 1)

}

augment_full <- function(Data){
    Data %>%
        select(-group) %>%
        dplyr::group_by(sample_name) %>%
        dplyr::arrange(wavenumber) %>%
        dplyr::mutate(intensity_og = minmax(intensity)) %>%
        dplyr::mutate(intensity_aug1 = minmax(intensity_og  + rnorm(length(wavenumber), mean = 0, sd = 1/sample(c(30:300), 1)))) %>%
        dplyr::mutate(intensity_aug2 = minmax(intensity_aug1  + minmax(RandomlyAugmentDataBaseline(wavenumber)))) %>%
        ungroup()
}

#Augment the library data ----
# Fetch current spectral library from https://osf.io/x7dpz/
get_lib()

# Load library into global environment
spec_lib <- load_lib()

#subset 100 of each library
raman_subset <- unique(spec_lib$raman$library$sample_name)[sample(1:length(unique(spec_lib$raman$library$sample_name)), 100, replace = F)]
ftir_subset <- unique(spec_lib$ftir$library$sample_name)[sample(1:length(unique(spec_lib$ftir$library$sample_name)), 100, replace = F)]

augmented_raman <- augment_full(spec_lib$raman$library %>%
                                    filter(sample_name %in% raman_subset)) %>%
                    mutate(intensity = intensity_aug2)

augmented_ftir <- augment_full(spec_lib$ftir$library %>%
                                   filter(sample_name %in% ftir_subset))%>%
                    mutate(intensity = intensity_aug2)


#Might consider adding this in at some point.
# Adjust spectral intensity
#raman_adj <- raman_hdpe %>%
#    adj_intens()

# Smooth and background-correct spectrum
raman_proc <- augmented_raman %>%
    group_by(sample_name) %>%
    smooth_intens(p = 3) #%>%
    subtr_baseline(degree = 8) %>%
    mutate()

ftir_proc <- augmented_ftir %>%
    group_by(sample_name) %>%
    smooth_intens() %>%
    subtr_baseline()

spectrum = raman_subset[100]

#Check that augmentation and cleaning performed as expected.
ggplot() +
    geom_line(data = augmented_raman %>%
                   filter(sample_name == spectrum),
               aes(x = wavenumber, y = intensity)
                   ) +
    geom_line(data = augmented_raman %>%
                    filter(sample_name == spectrum) %>%
                    smooth_intens(p = 3) %>%
                    subtr_baseline(degree = 8),
                aes(x = wavenumber, y = intensity))


#Check that cleaned match is to something reasonable.
augmented_raman %>%
    filter(sample_name == spectrum) %>%
    smooth_intens(p = 3) %>%
    subtr_baseline(degree = 8) %>%
    match_spec(library = spec_lib, which = "raman")

spectrum


#Match back to the library using the Open Specy functions to see if we can get back to the correct identity for the top match.
raman_identities <- c()
for(spectrum in raman_subset){

    identity <- unlist(spec_lib$raman$metadata[sample_name == spectrum, "spectrum_identity"])

    topmatch <- augmented_raman %>%
        filter(sample_name == spectrum) %>%
        smooth_intens(p = 3) %>%
        subtr_baseline(degree = 8) %>%
        match_spec(library = spec_lib, which = "raman", top_n = 1) %>%
        select(spectrum_identity) %>%
        unlist()

    raman_identities <- c(raman_identities, identity == topmatch)

}

#Needs to be above 80%
sum(raman_identities)/length(raman_identities)


#FTIR validation
ftir_identities <- c()
for(spectrum in ftir_subset){

    identity <- unlist(spec_lib$ftir$metadata[sample_name == spectrum, "spectrum_identity"])

    topmatch <- augmented_ftir %>%
        filter(sample_name == spectrum) %>%
        smooth_intens(p = 3) %>%
        subtr_baseline(degree = 8) %>%
        match_spec(library = spec_lib, which = "ftir", top_n = 1) %>%
        select(spectrum_identity) %>%
        unlist()

    ftir_identities <- c(ftir_identities, identity == topmatch)

}

#Needs to be above 80%
sum(ftir_identities)/length(ftir_identities)

Try the OpenSpecy package in your browser

Any scripts or data that you put into this service are public.

OpenSpecy documentation built on Nov. 26, 2023, 1:09 a.m.