R/glottoget.R
In glottospace: Language Mapping and Geospatial Analysis of Linguistic and Cultural Data

Documented in glottoget

#' Get glottodata from local path or online global databases
#'
#' Load locally stored glottodata, download databases from online sources, or load built-in demo data
#'
#' @param glottodata options are:
#' \itemize{
#' \item A filepath to locally stored glottodata or glottosubdata with file extension (.xlsx .xls .gpkg
#' .shp). See also: options meta and simplify.
#' \item "glottobase" - Default option, an spatially enhanced version of \href{https://glottolog.org/}{glottolog}. See
#' \link{glottobooster} for details. If glottodata = NULL, "glottobase" will be loaded.
#' \item "wals" - This is a spatially enhanced version of \href{https://wals.info/}{WALS}.
#' \item "dplace" - Not yet supported. This is a spatially enhanced version of \href{https://d-place.org/}{D-PLACE}.
#' \item "glottolog" - This is a restructured (non-spatial) version of \href{https://glottolog.org/}{glottolog}.
#' \item "glottospace" - A simple dataset with glottocodes and a geometry column. This
#' is a subset of all languages in \href{https://glottolog.org/}{glottolog} with
#' spatial coordinates.
#' \item "demodata" - Built-in artificial glottodata (included for demonstration and testing).
#' \item "demosubdata" - Built-in artificial glottosubdata (included for demonstration and testing)
#' }
#' @param meta In case 'glottodata' is demodata/demosubdata: by default, meta sheets are not loaded. Use meta=TRUE if you want to include them.
#' @param download By default internally stored versions of global databases are used. Specify download = TRUE in case you want to download the latest version from a remote server.
#' @param dirpath Optional, if you want to store a global CLDF dataset in a specific directory, or load it from a specific directory.
#' @param url Zenodo url, something like this: "https://zenodo.org/api/records/3260727"
#'
#' @family <glottodata>
#' @return A glottodata or glottosubdata object (a data.frame or list, depending on which glottodata is requested)
#' @export
#' @examples
#' \donttest{
#' glottoget("glottolog")
#' }
glottoget <- function(glottodata = NULL, meta = FALSE, download = FALSE, dirpath = NULL, url = NULL){
  if(!is.null(url)){
    glottoget_zenodo(url = url, dirpath = dirpath)
  } else if(is.null(glottodata)){
    glottodata <- glottoget_glottobase(download = download, dirpath = dirpath)
  } else if(glottodata == "glottobase"){
    glottodata <- glottoget_glottobase(download = download, dirpath = dirpath)
  } else if(glottodata == "glottolog"){
    glottodata <- glottoget_glottolog(download = download, dirpath = dirpath)
  } else if (glottodata == "glottospace"){
    glottodata <- glottoget_glottospace(download = download, dirpath = dirpath)
  } else if(glottodata == "demodata"){
    glottodata <- glottocreate_demodata(meta = meta)
  } else if(glottodata == "demosubdata"){
    glottodata <- glottocreate_demosubdata(meta = meta)
  } else if(glottodata == "wals"){
    glottodata <- glottoget_wals(download = download)
  } else if(glottodata == "dplace"){
    glottodata <- glottoget_dplace(download = download)
  } else if(tools::file_ext(glottodata) != ""){
    glottodata <- glottoget_path(filepath = glottodata)
  } else {message("Unable to load requested glottodata")}
return(glottodata)
}

#' Load glotto(sub)data from file
#'
#' Load glottodata/glottosubdata from a file
#'
#' @param filepath Path to glottodata file with extension (.xlsx .xls .gpkg .shp). If no filepath is specified, an artificial demo dataset will be created.
#' @family <glottodata>
#'
#' @noRd
#' @seealso glottosave
glottoget_path <- function(filepath = NULL){

  # metasheets <- names(glottocreate_metatables())

  if(tools::file_ext(filepath) == "xlsx" | tools::file_ext(filepath) == "xls"){
    sheetnames <- readxl::excel_sheets(filepath)

  glottodata <- base::lapply(X = sheetnames,
                         FUN = readxl::read_excel, path = filepath)
  names(glottodata) <- sheetnames

    } else if(tools::file_ext(filepath) == "csv"){
      glottodata <- utils::read.csv(filepath, header = TRUE, encoding = "UTF-8")
    } else if(tools::file_ext(filepath) == "gpkg" | tools::file_ext(filepath) == "shp"){
      glottodata <- sf::st_read(dsn = filepath)
    }

  return(glottodata)
}


#' Get glottobase reference data
#'
#' Downloads most recent glottolog data and transforms it. This 'glottobase' is used as reference dataset in several functions.
#'
#' @param ... Arguments to glottobooster
#'
#' @param download By default internally stored versions of global databases are used. Specify download = TRUE in case you want to download the latest version from a remote server.
#'
#' @noRd
#'
#' @examples
#' glottobase <- glottoget_glottobase()
glottoget_glottobase <- function(download = NULL, dirpath = NULL, ...){
  glottolog <- glottoget_glottolog(download = download, dirpath = dirpath)
  glottobase <- glottobooster(glottologdata = glottolog, ...)
  glottobase
}

#' Get glottospace reference data
#'
#' Get most recent glottolog data and turn it into the most elemental geoglot object (i.e. glottocodes + geometry column). This 'glottospace' is used as reference dataset in several functions.
#'
#'
#' @param download By default internally stored versions of global databases are used. Specify download = TRUE in case you want to download the latest version from a remote server.
#'
#' @noRd
#' @seealso glottospace_addcoords
#'
#'
#' @examples
#' glottospace <- glottoget_glottospace()
glottoget_glottospace <- function(download = NULL, dirpath = NULL){
  glottologdata <- glottoget_glottolog(download = download, dirpath = dirpath)
  glottologdata <- glottologdata %>% dplyr::rename("glottocode" = "id")
  glottospace <- glottospace_coords2sf(glottologdata)
  glottospace <- glottospace[,c("glottocode")]
  glottospace
}



#' Get metadata of remote cldf databases
#'
#' @param name Name of dataset "glottolog", "wals" or "dplace"
#' @param url Optional url
#'
#' @noRd
#'
glottoget_remotemeta <- function(name = NULL, url = NULL){
  rlang::check_installed("jsonlite", reason = "to use `glottoget_remotemeta()`")
  rlang::check_installed("xml2", reason = "to use `glottoget_remotemeta()`")
  if(is.null(name) & !is.null(url)){
    base_url <- url
  } else if(tolower(name) == "glottolog"){
    # Newest version is always uploaded here!
    base_url <- "https://zenodo.org/api/records/3260727"
  } else if(tolower(name) == "wals"){
    # Newest version is always uploaded here!
    base_url <- "https://zenodo.org/api/records/3606197"
  } else if(name == "dplace" | name == "d-place"){
    base_url <- "https://zenodo.org/api/records/3935419"
  } else if(!is.null(name) ){
    stop("Unable to download data from Zenodo. Unrecognized name argument. ")
  }

  remote <- suppressWarnings(jsonlite::stream_in(url(base_url)))

    version <- remote$metadata$version
    btime <- utils::timestamp()
    citation <- xml2::xml_text(xml2::read_html(charToRaw(remote$metadata$description), encoding = "UTF-8"))

    # v <- paste0(c("Version: ", version), collapse = "" )
    # c <- paste0(c("Citation: ", gsub(pattern = "\n", replacement = " ", x = citation)), collapse = "" )
    # b <- paste0(c("Built time: ", btime), collapse = "" )
    #
    # paste(c(v,c,b),  sep = "\n")

    metainfo <- paste0(c("version: ", version, "\n\n\n", citation, "\n\n", btime),  collapse = "")

    paste0("\\note{", metainfo, "}")

}



#' Download database from Zenodo
#'
#' @param name Name of a dataset, either glottolog, wals or dplace
#' @param dirpath Path to directory where files should be stored
#' @param url Zenodo url, something like this: "https://zenodo.org/api/records/3260727"
#'
#' @noRd
glottoget_zenodo <- function(name = NULL, url = NULL, dirpath = NULL){
  rlang::check_installed("jsonlite", reason = "to use `glottoget_zenodo()`")
  if(is.null(name) & !is.null(url)){
    base_url <- url
  } else if(tolower(name) == "glottolog"){
    # Newest version is always uploaded here!
    base_url <- "https://zenodo.org/api/records/3260727"
  } else if(tolower(name) == "wals"){
    # Newest version is always uploaded here!
    base_url <- "https://zenodo.org/api/records/3606197"
  } else if(name == "dplace" | name == "d-place"){
    base_url <- "https://zenodo.org/api/records/3935419"
  } else if(!is.null(name) ){
    stop("Unable to download data from Zenodo. Unrecognized name argument. ")
  }

  if(is.null(dirpath)){
    dirpath <- tempdir(check = TRUE)
    # if(dir.exists(dirpath)){unlink(x = dirpath, recursive = TRUE)}
  } else {
    if(dir.exists(dirpath)){stop("Directory already exists, please choose a different location.")}
    }

  remote <- suppressWarnings(jsonlite::stream_in(url(base_url)))
  url <- remote$files[[1]]$links[[1]]

  filepath <- file.path(tempfile())
  utils::download.file(file.path(url), destfile = filepath)
  utils::unzip(zipfile = filepath, exdir = dirpath)

  version <- remote$metadata$version

  if(!is.null(name)){
  if(tolower(name) == "glottolog"){
    message(paste0("Glottolog data downloaded (glottolog ", version,"). This is the most recent version available from ", base_url) )
  } else if(tolower(name) == "wals"){
    message(paste0("WALS data downloaded (wals-", version,"). This is the most recent version available from ", base_url) )
  } else if(tolower(name) == "dplace"){
    message(paste0("D-PLACE data downloaded (", version,"). This is the most recent version available from ", base_url) )
  }
  }
invisible(dirpath)

}

#' Load locally stored cldf data
#'
#' @param dirpath Path to directory where cldf data is stored
#' @param name Name of a dataset, either glottolog, wals or dplace
#' @param valuenames Do you want to add names instead of codes?
#'
#' @noRd
glottoget_cldf <- function(dirpath, name, valuenames = FALSE){
  if(!dir.exists(dirpath)){stop("Directory not found.")}

  cldf_metadata <- base::list.files(dirpath, pattern = "-metadata.json", recursive = TRUE)
  cldfid <- grep(pattern = tolower(name), x = tolower(cldf_metadata))
  mdpath <- normalizePath(file.path(dirpath, cldf_metadata[[cldfid]]))
  mddir <- normalizePath(base::dirname(mdpath))

  # Load languages file
  languoids <- normalizePath(file.path(mddir, "languages.csv"))
  languoids <- utils::read.csv(languoids, header = TRUE, encoding = "UTF-8")
  colnames(languoids) <- base::tolower(colnames(languoids))
  colnames(languoids)[which(colnames(languoids) == "id")] <- "lang_id"

  # Load values file
  values <- normalizePath(file.path(mddir, "values.csv"))
  values <- utils::read.csv(values, header = TRUE, encoding = "UTF-8")
  colnames(values) <- base::tolower(colnames(values))
  colnames(values)[colnames(values) == "language_id"] <- "lang_id"
  params <- unique(values$parameter_id)

  if(valuenames == TRUE) {    # Join values to codes by code_id
    codes <- normalizePath(file.path(mddir, "codes.csv"))
    codes <- utils::read.csv(codes, header = TRUE, encoding = "UTF-8")
    values <- values %>% dplyr::left_join(codes, by = c("code_id" = "ID") )
    values$value <- NULL
    values <- dplyr::rename(.data = values, value = .data$Name)
  }

  valsel <- values[, c("lang_id", "parameter_id", "value")]

  # Check duplicates:
  duplo <- valsel %>%
    dplyr::group_by(.data$lang_id, .data$parameter_id) %>%
    dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
    dplyr::filter(.data$n > 1L)

  if(nrow(duplo) == 0){
    valselpiv <- tidyr::pivot_wider(data = valsel, names_from = "parameter_id", values_from = "value")
  } else {
    message(paste0("For ", length(unique(duplo$lang_id)), " of the ", length(unique(valsel$lang_id)), " languages, a parameter has multiple values. These are summarized by taking the mean"))
    valselpiv <- tidyr::pivot_wider(data = valsel, names_from = "parameter_id", values_from = "value",
                                    values_fn = mean)
  }

  data <- dplyr::left_join(languoids, valselpiv, by = "lang_id")
  data <- base::subset(data, select = c("glottocode", params))
  data <- data[!purrr::is_empty(data$glottocode) & data$glottocode != "", ]

  data <- glottojoin_base(data)
  invisible(data)


}

Any scripts or data that you put into this service are public.

glottospace documentation built on April 12, 2022, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

glottospace
Language Mapping and Geospatial Analysis of Linguistic and Cultural Data

R/glottoget.R
In glottospace: Language Mapping and Geospatial Analysis of Linguistic and Cultural Data

Defines functions glottoget_cldf glottoget_zenodo glottoget_remotemeta glottoget_glottospace glottoget_glottobase glottoget_path glottoget

Documented in glottoget

Try the glottospace package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

glottospace Language Mapping and Geospatial Analysis of Linguistic and Cultural Data

R/glottoget.R In glottospace: Language Mapping and Geospatial Analysis of Linguistic and Cultural Data

Defines functions glottoget_cldf glottoget_zenodo glottoget_remotemeta glottoget_glottospace glottoget_glottobase glottoget_path glottoget

Documented in glottoget

Try the glottospace package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

glottospace
Language Mapping and Geospatial Analysis of Linguistic and Cultural Data

R/glottoget.R
In glottospace: Language Mapping and Geospatial Analysis of Linguistic and Cultural Data