#' List jTRACE available languages
#' @export jtrace_list_languages
#' @author Gonzalo Garcia-Castro <gonzalo.garciadecastro@upf.edu>
#' @returns A character vector listing the available languages in the jTRACE folder
#' @seealso \code{\link{jtrace_get_language}} for importing a language, and \code{\link{jtrace_create_language}} for creating a new language.
#' @references Strauss, T. J., Harris, H. D., & Magnuson, J. S. (2007). jTRACE: A reimplementation and extension of the TRACE model of speech perception and spoken word recognition. Behavior Research Methods, 39(1), 19-30.
#' @examples
#' \dontrun{
#' jtrace_list_languages()
#' }
jtrace_list_languages <- function(){
is_installed <- jtrace_is_installed()
if (!is_installed) stop("jTRACE is not installed, please run jtrace_install()")
dir_path <- file.path(system.file("jtrace", package = "jtracer", mustWork = TRUE), "languages")
x <- gsub(".xml", "", list.files(dir_path, pattern = ".xml"))
return(x)
}
#' Get jTRACE language
#' @export jtrace_get_language
#' @author Gonzalo Garcia-Castro <gonzalo.garciadecastro@upf.edu>
#' @importFrom rlang .data
#' @importFrom stats setNames
#' @importFrom utils download.file
#' @importFrom utils unzip
#' @importFrom XML xmlToDataFrame
#' @importFrom stringr str_extract
#' @seealso \code{\link{jtrace_list_languages}} for listing available languages, and \code{\link{jtrace_create_language}} for creating a new language.
#' @references Strauss, T. J., Harris, H. D., & Magnuson, J. S. (2007). jTRACE: A reimplementation and extension of the TRACE model of speech perception and spoken word recognition. Behavior Research Methods, 39(1), 19-30.
#' @param language_name Character vector of length 1 indicating the jTRACE language to import. Defaults to "default".
#' @return A list of data frames containing a data frame for the phonemes and their
#' scores across the seven features implemented in jTRACE (\code{features}),
#' a data frame containing the duration scalars of each phoneme for the even features
#' implemented in jTRACE (\code{duration_scalar}), and a data frame containing the
#' allophonic relations between the phonemes (\code{allophonic_relations}).
#' @examples
#' jtrace_get_language("default")
jtrace_get_language <- function(
language_name = "default"
){
is_installed <- jtrace_is_installed()
if (!is_installed) stop("jTRACE is not installed, please run jtrace_install()")
suppressWarnings({
language_list <- jtrace_list_languages()
if (length(language_list)<1) stop("There are no languages available")
if (is.null(language_name) || !(language_name %in% gsub(".xml", "", language_list))){
stop(paste0("Please, specify a valid language. Available languages are: ", paste0(language_list, collapse = ", ")))
}
if(length(language_name) > 1) stop("Please, specify just one lexicon")
language_list <- paste0(system.file("jtrace", "languages", package = "jtracer", mustWork = TRUE), .Platform$file.sep, language_name, ".xml") %>%
readLines(warn = FALSE) %>%
paste0(collapse = "") %>%
strsplit(split = "<phoneme>") %>%
unlist() %>%
as.list()
language_list[[1]] <- NULL
# symbols
phonemes <- lapply(language_list, function(x) {
str_extract(x, "(?<=\\<symbol\\>)(.*)(?=\\<\\/symbol>)") %>%
strsplit(split = " ") %>%
unlist()
}) %>%
unlist()
phonemes <- phonemes[!is.na(phonemes)]
# features
feature_names <- c("bur","voi", "con", "grd", "dif", "voc", "pow")
features <- lapply(language_list, function(x) {
y <- str_extract(x, "(?<=\\<features\\>)(.*)(?=\\<\\/features>)") %>%
strsplit(split = " ") %>%
unlist() %>%
as.numeric()
y <- as.data.frame(matrix(data = y[!is.na(y)], nrow = 9, ncol = 7))
colnames(y) <- feature_names
return(y)
})
features <- lapply(features, function(x) apply(X = x, MARGIN = 2, FUN = which.max))
features <- do.call(rbind, features)
row.names(features) <- phonemes
# duration scalar
duration_scalar <- lapply(language_list, function(x) {
y <- str_extract(x, "(?<=\\<durationScalar\\>)(.*)(?=\\<\\/durationScalar>)") %>%
strsplit(split = " ") %>%
unlist() %>%
as.numeric()
y <- as.data.frame(matrix(data = y[!is.na(y)], nrow = 1, ncol = 7))
})
duration_scalar <- do.call(rbind, duration_scalar)
row.names(duration_scalar) <- phonemes
colnames(duration_scalar) <- feature_names
# allophonic relations
allophonic_relations <- lapply(language_list, function(x) {
y <- str_extract(x, "(?<=\\<allophonicRelations\\>)(.*)(?=\\<\\/allophonicRelations>)") %>%
strsplit(split = " ") %>%
unlist()
})
allophonic_relations <- do.call(rbind, allophonic_relations)
allophonic_relations <- duration_scalar[!duration_scalar==""]
allophonic_relations <- duration_scalar=="true"
allophonic_relations <- array(allophonic_relations, dim = c(length(phonemes), length(phonemes)))
colnames(allophonic_relations) <- phonemes
row.names(allophonic_relations) <- phonemes
# merge everything
language <- list(
features = features,
duration_scalar = duration_scalar,
allophonic_relations = allophonic_relations
)
})
return(language)
}
#' Create jTRACE language (phonemic inventory)
#' @export jtrace_create_language
#' @author Gonzalo Garcia-Castro <gonzalo.garciadecastro@upf.edu>
#' @importFrom tidyr pivot_longer
#' @importFrom tidyr pivot_wider
#' @seealso \code{\link{jtrace_list_languages}} for listing available languages, and \code{\link{jtrace_get_language}} for importing a language.
#' @references Strauss, T. J., Harris, H. D., & Magnuson, J. S. (2007). jTRACE: A reimplementation and extension of the TRACE model of speech perception and spoken word recognition. Behavior Research Methods, 39(1), 19-30.
#' @param phonemes Character vector indicating the jTRACE notation of each phoneme. It must be the same length as the number of rows of the matrix or data frame introduced in \code{features}. This argument can be left NULL (default) if the matrix or data frame introduced in \code{features} has appropriate row names indicating the jTRACE notation of the phonemes.
#' @param features A M x N matrix or data frame (where M is the number of phonemes and N is 7, the number of features) that contains the values of the features (columns) for each phoneme (rows) with a score ranging from 0 to 9.
#' @param duration_scalar Matrix or data frame indicating the values of the duration scalar, with each phoneme as a row and each feature as a column. If NULL (default), all duration values are set to 1.
#' @param allophonic_relations Array or data frame with logical values indicating whether each combination of phonemes is an allophone, with phonemes are rows and columns. If NULL (default), no allophonic relations are specified.
#' @param language_name Name of the language that will be created.
#' @examples
#' \dontrun{
#' # first, we create a character vector with the phoneme symbols
#' p <- c("-", "a", "s", "d", "f", "g", "c")
#' # then we create a the features matrix
#' f <- data.frame(
#' bur = c(9, 6, 4, 3, 1, 1, 2),
#' voi = c(7, 4, 3, 3, 3, 3, 4),
#' con = c(8, 2, 4, 2, 5, 5, 6),
#' grd = c(4, 6, 1, 4, 6, 8, 6),
#' dif = c(6, 3, 2, 6, 6, 6, 7),
#' voc = c(3, 8, 1, 6, 6, 7, 4),
#' pow = c(6, 4, 1, 6, 1, 1, 5)
#' )
#' # now we create the language
#' jtrace_create_language(language_name = "my_language", phonemes = p, features = f)
#' }
jtrace_create_language <- function(
phonemes = NULL,
features,
duration_scalar = NULL,
allophonic_relations = NULL,
language_name
){
is_installed <- jtrace_is_installed()
if (!is_installed) stop("jTRACE is not installed, please run jtrace_install()")
# check params
if (is.null(language_name)) language_name <- readline()
if (is.null(duration_scalar)) duration_scalar <- matrix(1, nrow = nrow(features), ncol = 7)
if (is.null(allophonic_relations)) allophonic_relations <- array("false", dim = c(nrow(features), nrow(features)))
if (is.null(phonemes) & is.null(row.names(features))) {
stop("Phoneme notations must be introduced in the phonemes argument or as row names in the features matrix")
} else if (is.null(phonemes)) {
phonemes <- row.names(features)
}
if (any(duplicated(phonemes))) {
stop("Phonemes cannot be duplicated. Hint: sometimes, special characters are encoded into normal ones, leading to duplications.")
}
# headers
header_1 <- paste0(
"<?xml version='1.0' encoding='UTF-8'?><phonology xmlns='http://xml.netbeans.org/examples/targetNS'\nxmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\nxsi:schemaLocation='http://xml.netbeans.org/examples/targetNS file:",
system.file("jtrace", package = "jtracer"), "/Schema/jTRACESchema.xsd'>\n"
)
tag_name <- paste0("<languageName>", language_name, "</languageName>\n")
header_2 <- "<phonemes>\n"
# features
f <- lapply(split.data.frame(features, 1:nrow(features)), t)
f <- lapply(f, function(y){
cbind(y, setNames(sapply(1:9, function(x) x = as.numeric(x==y[,1])), 1:9))[,-1]
})
f <- lapply(f, function(x) c(t(x)))
# duration scalar
d <- lapply(split.data.frame(duration_scalar, 1:nrow(duration_scalar)), t)
d <- lapply(d, function(x) c(t(x)))
# allophonic relations
a <- lapply(split.data.frame(allophonic_relations, 1:nrow(allophonic_relations)), t)
a <- lapply(a, function(x) c(t(x)))
a <- lapply(a, function(x) ifelse(x, "true", "false"))
body <- as.list(phonemes)
s <- setNames(vector(mode = "list", length = length(phonemes)), phonemes)
for (i in 1:length(f)){
s[[i]]$symbol <- as.list(phonemes)[[i]]
s[[i]]$features <- paste0(f[[i]], ".0")
s[[i]]$duration_scalar <- ifelse(d[[i]] %in% c(0, 1), paste0(d[[i]], ".0"), d[[i]])
s[[i]]$allophonic_relations <- paste0(a[[i]])
body[[i]] <- paste0(
"<phoneme>\n",
"\t<symbol>", s[[i]]$symbol, "</symbol>\n",
"\t<features>", paste0(s[[i]]$features, collapse = " "), "</features>\n",
"\t<durationScalar>", paste0(s[[i]]$duration_scalar, collapse = " "), "</durationScalar>\n",
"\t<allophonicRelations>", paste0(s[[i]]$allophonic_relations, collapse = " "), "</allophonicRelations>\n",
"</phoneme>\n",
collapse = ""
)
}
body <- paste0(unlist(body), collapse = "")
# footer
footer <- "</phonemes>\n</phonology>"
# merge all
x <- c(header_1, tag_name, header_2, body, footer)
# output path
output_path <- paste0(system.file("jtrace", "languages", package = "jtracer", mustWork = TRUE), .Platform$file.sep, language_name, ".xml")
writeLines(text = paste0(x, collapse = ""), con = output_path)
}
#' Transcribe phonology from IPA to jTRACE notation
#' @export ipa_to_jtrace
#' @importFrom mgsub mgsub
#' @importFrom utils data
#' @param x A character vector with the phonological forms to be transcribed
#' @param keep_other Should symbols other than phonemes be kept in the
#' transcriptions? Defaults to FALSE
#' @details 1) If \code{keep_other}, special characters (symbols that do not
#' correspond to phonemes in the \code{phonemes} data set, such as apostrophes
#' or dots) are removed. 2) Colons (:) are replaced with the previous symbol,
#' since they are interpreted as a modifier of the duration of the previous
#' phoneme. 3) Pairwise replacements are performed according to the
#' \code{phonemes} data set.
#' @return A character vector with the jTRACE transcriptions of the provided
#' phonological forms
ipa_to_jtrace <- function(
x,
keep_other = FALSE
){
# to avoid issues with bindings in CMD CHECK
.new_env <- new.env(parent = emptyenv())
data("phonemes", envir = .new_env)
phonemes <- .new_env[["phonemes"]]
if (!keep_other) x <- gsub("<U+0361>| |\\.|<U+02C8>|'|\\\\|/", "", x)
x <- lapply(
as.list(x),
function(y){
y_split <- unlist(strsplit(y, split = ""))
if (any(grepl(":|<U+0306>", y))){
y_split[grep(":|<U+0306>", y_split)] <- y_split[grep(":|<U+0306>", y_split)-1]
}
y_collapsed <- paste0(y_split, collapse = "")
y_collapsed <- mgsub(y_collapsed, phonemes$ipa, phonemes$trace)
return(y_collapsed)
}
)
x <- unlist(x)
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.