knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE )
##### NOT RUN ######
This vignette will download an additional data set for training and testing. It will be stored within the root directory of the hardRain package. Run the R chunks to download and use this extra testing data. The data is hosted on an OSF project: https://osf.io/qpev7/
library(hardRain) library(ggplot2) library(tidyr)
NOTE: this will download 200 15s wav files (252 MB) to a new directory.
Option 1. Download using osfr
library. This will create a directory named 'extra_data'
in the hardRain root folder in your R library location.
You must have write permission to this folder.
library(osfr) hr_path <- system.file(package = "hardRain") extra_data_dir <- file.path(hr_path, "extra_data") if(!dir.exists(extra_data_dir)){ # get osf project hardRain <- osf_retrieve_node("https://osf.io/qpev7/") # list files on hardRain osf project ls_files <- osf_ls_files(x = hardRain) ls_files # get id for extra_data folder id <- ls_files$id[ls_files$name == "extra_data"] # get directory extra_data_osf <- osf_retrieve_file(id) # download data to package root in a new 'extra_data' directory hardRain_osf2 <- osf_download(x = extra_data_osf, path = hr_path, recurse = TRUE, conflicts= "skip", progress = TRUE) } else warning("Directory already exists - check contents!")
Option 2. Download the complete data set from here as a zip and store in your desired location and set this variable to the 'extra_data' folder location
# extra_data_dir <- "PATH/TO/extra_data"
Rain and test data:
- rain data: 100 wav files known to be hard rain
- test data: 100 wav files - 50 rain, 50 non-rain (clear)
Get the filenames of the training and test data:
train <- list.files(path = file.path(extra_data_dir, "rain"), pattern = "\\.wav$", full.names = TRUE) test <- list.files(path = file.path(extra_data_dir, "test"), pattern = "\\.wav$", full.names = TRUE)
Calculate the threshold using default settings - for two frequency bands
tr <- getThreshold(train) tr
Inspect the Power Spectrum Density (psd) and Signal to Noise values (s2n) for rain and non-rain files at specific frequency bands
met <- getMetrics(test) head(met) # bind into a data frame with the filenames psd_s2n <- data.frame(filename = rownames(met), met) rownames(psd_s2n) <- NULL summary(psd_s2n) # add test status psd_s2n$rain <- ifelse(grepl("Clear", psd_s2n$filename), FALSE, TRUE) head(psd_s2n) # reorganise metrics <- tidyr::pivot_longer(psd_s2n, c(band.1.psd:band.2.s2n), names_to = c("band", "metric"), names_pattern = "(band\\.1|band\\.2)\\.(.*)", values_to = "value") head(metrics)
Draw a boxplot to see separation between rain and non-rain for each metric
ggplot(metrics, aes(x = band, y = value, fill = rain))+ geom_boxplot()+ facet_wrap(~metric, scales = "free_y")+ ylab("Metric value")+ xlab("Band")
Classify the test files using the thresholds obtained above
res <- classifyRain(test, thresh.vals = tr, threshold = c("min", "Q2"), ID = !grepl("Clear", basename(test))) head(res)
How many files identified as rain/non-rain?
table(res[c("ID", "value", "threshold")])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.