create_phrases_db <- function(item_df, phrase_segment_outlier_threshold = .65, ioi_threshold = .96) {
if("midi_file" %in% names(item_df)) {
phrase_df <- item_df %>%
dplyr::rowwise() %>%
dplyr::mutate(tmp_df = list(get_phrase_helper(abs_melody, durations, midi_file = midi_file, phrase_segment_outlier_threshold = phrase_segment_outlier_threshold, ioi_threshold = ioi_threshold))) %>%
dplyr::ungroup() %>%
dplyr::pull(tmp_df) %>%
dplyr::bind_rows()
} else if("musicxml_file" %in% names(item_df)) {
phrase_df <- item_df %>%
dplyr::rowwise() %>%
dplyr::mutate(tmp_df = list(get_phrase_helper(abs_melody, durations, musicxml_file = musicxml_file, phrase_segment_outlier_threshold = phrase_segment_outlier_threshold, ioi_threshold = ioi_threshold))) %>%
dplyr::ungroup() %>%
dplyr::pull(tmp_df) %>%
dplyr::bind_rows()
} else if("midi_file" %in% names(item_df) && "musicxml_file" %in% names(item_df)) {
phrase_df <- item_df %>%
dplyr::rowwise() %>%
dplyr::mutate(tmp_df = list(get_phrase_helper(abs_melody, durations, midi_file = midi_file, musicxml_file = musicxml_file, phrase_segment_outlier_threshold = phrase_segment_outlier_threshold, ioi_threshold = ioi_threshold))) %>%
dplyr::ungroup() %>%
dplyr::pull(tmp_df) %>%
dplyr::bind_rows()
} else {
phrase_df <- item_df %>%
dplyr::rowwise() %>%
dplyr::mutate(tmp_df = list(get_phrase_helper(abs_melody, durations, phrase_segment_outlier_threshold = phrase_segment_outlier_threshold, ioi_threshold = ioi_threshold))) %>%
dplyr::ungroup() %>%
dplyr::pull(tmp_df) %>%
dplyr::bind_rows()
}
phrase_df
}
get_phrase_helper <- function(abs_melody, durations, midi_file = NULL, musicxml_file = NULL, phrase_segment_outlier_threshold = .65, ioi_threshold = .96) {
stopifnot(
is.null.or(midi_file, is.scalar.character),
is.null.or(musicxml_file, is.scalar.character),
is.scalar.numeric(phrase_segment_outlier_threshold),
is.scalar.numeric(ioi_threshold)
)
# NB. TO FIX: The need for the tryCatch is only when using a files_db created from musicxml file.
# For some reason musicxml files currently don't add up often, with abs_melody and durations
tryCatch({
tibble::tibble(abs_melody = itembankr::str_mel_to_vector(abs_melody),
durations = itembankr::str_mel_to_vector(durations)) %>%
dplyr::mutate(onset = cumsum(durations)) %>%
segment_phrase(phrase_segment_outlier_threshold = phrase_segment_outlier_threshold,
ioi_threshold = ioi_threshold)
}, error = function(err) {
logging::logerror(err)
logging::logerror("Item removed probably because mismatch in abs_melody and durations.")
tibble::tibble(abs_melody = NA, durations = NA, onset = NA, ioi = NA, note_pos = NA, phrasend = NA, phrasbeg = NA)
}) %>%
dplyr::mutate(midi_file = !! midi_file,
musicxml_file = !! musicxml_file)
}
#' Segment a note track by adding phrasend and phrasbeg columns with boolean markers.
#'
#' @param note_track a data frame with an "onset" column
#' @param phrase_segment_outlier_threshold A threshold for selecting outliers for durations in phrase segmentation.
#' @param ioi_threshold A threshold for selecting outliers for iois in phrase segmentation.
#' @param as_string_df Should the format of the DF be a string DF?
#'
#' @return
#' @export
#'
#' @examples
segment_phrase <- function(note_track,
phrase_segment_outlier_threshold = .65,
ioi_threshold = .96,
as_string_df = TRUE) {
# (originally add_phrase_info from KF; see below)
note_track <- note_track %>% dplyr::mutate(ioi = c(0, diff(onset)), note_pos = dplyr::row_number())
bp <- note_track %>% dplyr::pull(ioi) %>% boxplot(plot = FALSE)
if(length(bp$out) > 0) {
outliers <- bp$out
outliers <- outliers[outliers > phrase_segment_outlier_threshold]
note_track <- note_track %>%
dplyr::mutate(phrasend = as.numeric(ioi > ioi_threshold | ioi %in% outliers),
phrasbeg = as.numeric(dplyr::lag(phrasend) | note_pos == 1))
note_track$phrasend[is.na(note_track$phrasend)] <- 0
note_track$phrasend[length(note_track$phrasend)] <- 1
} else {
note_track <- note_track %>%
dplyr::mutate(phrasend = c(rep(0, nrow(.)-1), 1),
phrasbeg = c(1, c(rep(0, nrow(.)-1))))
}
# Typically for creating an item_bank (i.e., here in itembankr, we want the output to be a string df)
# But in musicassessr, we don't
if(as_string_df) {
phrase_db <- note_track %>%
dplyr::mutate(phrasebeg_pos = dplyr::case_when(phrasbeg == 1 ~ note_pos, TRUE ~ NA),
phrasend_pos = dplyr::case_when(phrasend == 1 ~ note_pos, TRUE ~ NA))
phrase_db <- tibble::tibble(phrasebeg_pos = phrase_db$phrasebeg_pos[!is.na(phrase_db$phrasebeg_pos)],
phrasend_pos = phrase_db$phrasend_pos[!is.na(phrase_db$phrasend_pos)])
phrase_db <- purrr::pmap_dfr(phrase_db, function(phrasebeg_pos, phrasend_pos) {
note_track %>%
dplyr::filter(dplyr::between(note_pos, phrasebeg_pos, phrasend_pos)) %>%
musicassessr::to_string_df() %>%
dplyr::select(-c(note_pos, phrasend, phrasbeg))
})
return(phrase_db)
} else {
return(note_track)
}
}
add_phrase_info <- function(note_track, end_track) {
note_track <- note_track %>% dplyr::mutate(onset = time/1000)
final_ioi <- diff(c(note_track$onset[length(note_track$onset)], end_track/1000))
note_track <- note_track %>% dplyr::mutate(ioi = round(c(diff(onset), final_ioi), 2),
note_pos = 1:dplyr::n())
outliers <- note_track %>% dplyr::pull(ioi) %>% boxplot(plot = FALSE) %>% `[[`("out")
#outliers <- outliers[outliers > .65]
note_track %>%
dplyr::mutate(phrasend = as.numeric(ioi %in% outliers | note_pos == length(note_pos)),
phrasbeg = as.numeric(dplyr::lag(phrasend) | note_pos == 1))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.