R/form_schema_ext.R

Defines functions form_schema_ext

Documented in form_schema_ext

#' Show the extended schema of one form.
#'
#' `r lifecycle::badge("experimental")`
#'
#' @details ODK Central has introduced a new API endpoint in version 0.8 which
#' returns a parsed and flattened list of fields. This replaces the nested
#' form schema which is challenging to parse. This list is returned
#' by \code{\link{form_schema}}.
#'
#' However this still misses important elements, in particular \code{labels} and
#' \code{choice_lists}.
#'
#' \code{\link{form_schema_ext}} returns the same object as
#' \code{\link{form_schema}}
#' adding \code{labels} and \code{choice lists} in all languages available.
#' This is done by using the return object from \code{\link{form_xml}}.
#'
#' It has the exact function signature as \code{\link{form_schema}}.
#' In that sense, any call to \code{\link{form_schema}} can be replaced
#' by \code{\link{form_schema_ext}}
#'
#' This function, however, has been prepared with ODK Central version 0.8 or
#' higher. If you use it with an earlier version, a warning will be given.
#'
#'
#' @param flatten Whether to flatten the resulting list of lists (\code{TRUE})
#'   or not (\code{FALSE}, default). Only applies to ODK Central version < 0.8.
#' @param odata Whether to sanitise the field names to match the way they will
#'   be outputted for OData. While the original field names as given in the
#'   XForms definition may be used as-is for CSV output, OData has some
#'   restrictions related to the domain-qualified identifier syntax it uses.
#'   Only applies to ODK Central version < 0.8.
#'   Default: \code{FALSE}.
#' @param parse Whether to parse the form schema into a tibble of form field
#'   type and name. This uses \code{\link{form_schema_parse}} internally.
#'   If used together with `flatten=TRUE`, \code{\link{form_schema}} will raise
#'   a warning and return the unparsed, flattened form schema.
#'   Only applies to ODK Central version < 0.8.
#'   Default: TRUE.
#' @template param-pid
#' @template param-fid
#' @template param-url
#' @template param-auth
#' @template param-retries
#' @template param-odkcv
#' @template param-verbose
#' @return A tibble containing the form definition.
#'   For ODK Central 0.8, and with default parameters
#'   (\code{parse=TRUE}) for ODK Central 0.7, \code{\link{form_schema}} returns
#'   a tibble with the columns:
#'
#'   \itemize{
#'   \item \code{name} The field name as given in the form schema.
#'   \item \code{type} The field type, e.g. "string", "select1", etc.
#'   \item \code{path} The XForms path of the field,
#'   \item \code{ruodk_name} The predicted field name as generated by
#'     \code{\link{odata_submission_get}}, prefixed by the path, additionally
#'     cleaned with \code{\link[janitor]{make_clean_names}} to match the
#'     cleaned column names from \code{\link{odata_submission_rectangle}}.
#'   \item \code{label} The field label as given in the form schema.
#'     If specific languages are available,
#'     this column will return the \code{default} language or it will be empty
#'     if this is not specified.
#'   \item \code{label_lang} The field label in languange \emph{_lang} as
#'     given in the form schema.
#'   \item \code{choices} A list of lists containing at least \code{values} and,
#'     if available, \code{labels} of the choices as given in the form schema.
#'     If specific languages are available, this column will return the
#'     \code{default} language or it will be empty if this is not specified.
#'     Please notice that whenever choice filters are applied, this will return
#'     the unfiltered choice list.
#'   \item \code{choices_lang} A list of lists containing at least
#'     \code{values} and, if available, \code{labels} of the choices in language
#'     \emph{_lang} as given in the form schema.
#'     Please notice that whenever choice filters are applied, this will return
#'     the unfiltered choice list.
#'
#'   }
# nolint start
#' @seealso \url{https://docs.getodk.org/central-api-form-management/#getting-form-schema-fields}
#' @seealso \url{https://docs.getodk.org/central-api-form-management/#retrieving-form-xml}
# nolint end
#' @family form-management
#' @export
#' @examples
#' \dontrun{
#' # See vignette("setup") for setup and authentication options
#' # ruODK::ru_setup(svc = "....svc", un = "me@email.com", pw = "...")
#'
#' # With current ODK Central (>0.7)
#' # get extended schema:
#' fsx <- form_schema_ext()
#'
#' # print choice list in english:
#' fsx[fsx$name == "test_yn", "choices_english_(en)"][[1]]
#'
#' # view the extended schema:
#' fsx
#' }
form_schema_ext <- function(flatten = FALSE,
                            odata = FALSE,
                            parse = TRUE,
                            pid = get_default_pid(),
                            fid = get_default_fid(),
                            url = get_default_url(),
                            un = get_default_un(),
                            pw = get_default_pw(),
                            odkc_version = get_default_odkc_version(),
                            retries = get_retries(),
                            verbose = get_ru_verbose()) {
  # version warning
  # nocov start
  if (semver_lt(odkc_version, "0.8.0")) {
    # odkc_version < 0.8
    "Form Schema Extended works better with ODK Central 0.8 and above" %>%
      ru_msg_warn()
  }
  # nocov end

  # gets basic schema
  frm_schema <- form_schema(
    flatten = flatten,
    odata = odata,
    parse = parse,
    url = url,
    pid = pid,
    fid = fid,
    un = un,
    pw = pw,
    odkc_version = odkc_version,
    retries = retries,
    verbose = verbose
  )

  # get xml representation
  frm_xml <- form_xml(
    parse = FALSE,
    url = url,
    pid = pid,
    fid = fid,
    un = un,
    pw = pw,
    retries = retries
  ) %>%
    xml2::xml_ns_strip()

  ### parse translations
  all_translations <- xml2::xml_find_all(frm_xml, "//text")
  all_translations_ids <- xml2::xml_attr(all_translations, "id")

  # initialize dataframe
  extension <- data.frame(
    path = character(0),
    label = character(0),
    stringsAsFactors = FALSE
  )


  ### PART 1: parse labels
  raw_labels <- xml2::xml_find_all(frm_xml, "//label")

  # iterate through labels
  for (i in seq_along(raw_labels)) {
    ## read label
    this_rawlabel <- raw_labels[i]

    ## path
    # get ref from parent, without leading "/data"
    this_path <- sub(
      "/data", "",
      xml2::xml_attr(xml2::xml_parent(this_rawlabel), "ref")
    )

    # ensure this is a valid path
    if (!is.na(this_path)) {
      # add new empty row:
      extension[nrow(extension) + 1, ] <- rep(NA, ncol(extension))

      # add path
      extension[nrow(extension), "path"] <- this_path

      # first check if label is mapped with a translation function
      has_translation <- xml2::xml_has_attr(this_rawlabel, "ref")

      if (has_translation) {
        # find all translations related to this path:
        id <- sub(
          "')",
          "",
          sub("jr:itext\\('", "", xml2::xml_attr(this_rawlabel, "ref"))
        )
        translations <- all_translations[
          all_translations_ids == id
        ]

        # iterate through translations
        for (j in seq_along(translations)) {
          this_translation <- translations[j]

          # First check this is a regular text labels.
          #
          # Questions in ODK can have video, image and audio "labels",
          # which will be skipped. This is identified by the presence of
          # the 'form' attribute:
          is_regular_label <- !xml2::xml_has_attr(
            xml2::xml_find_first(this_translation, "./value"), "form"
          )

          if (is_regular_label) {
            # read the parent node to identify language:
            translation_parent <- xml2::xml_parent(this_translation)
            this_lang <- gsub(" ", "_", tolower(xml2::xml_attr(
              translation_parent, "lang"
            )))

            # decide if 'default' language or specific language
            if (this_lang == "default") {
              # if 'default' language, save under column 'label':
              extension[nrow(extension), "label"] <- xml2::xml_text(
                xml2::xml_find_first(this_translation, "./value")
              )
            } else {
              # check if language already exists in the datafram
              if (!(paste0("label_", this_lang) %in% colnames(extension))) {
                # if not, create new column
                extension <- cbind(
                  extension,
                  data.frame(
                    new_lang = rep(NA, nrow(extension))
                  )
                )
                colnames(extension)[ncol(extension)] <- paste0(
                  "label_", this_lang
                )
              }

              # add the first value content of the translation
              extension[
                nrow(extension),
                paste0("label_", this_lang)
              ] <- xml2::xml_text(
                xml2::xml_find_first(this_translation, "./value")
              )
            }
          }
        }
      } else {
        # extract content
        extension[nrow(extension), "label"] <- xml2::xml_text(this_rawlabel)
      }

      ### PART 1.1: parse choice labels
      ## check existence of  choice list:
      choice_items <- xml2::xml_find_all(
        xml2::xml_parent(this_rawlabel), "./item"
      )

      if (length(choice_items) > 0) {
        # check if 'choices' column already exist
        if (!("choices" %in% colnames(extension))) {
          # if not, create new column
          extension <- cbind(extension, data.frame(
            choices = rep(NA, nrow(extension))
          ))
        }

        # initialize lists
        choice_values <- list()
        choice_labels <- list()

        # iterate through choice list:
        for (jj in seq_along(choice_items)) {
          ## read choice item
          this_choiceitem <- choice_items[jj]

          # value
          this_choicevalue <- xml2::xml_text(
            xml2::xml_find_first(this_choiceitem, "./value")
          )
          choice_values[jj] <- this_choicevalue

          # raw label
          this_rawchoicelabel <- xml2::xml_find_first(
            this_choiceitem, "./label"
          )

          # first check if choice label is mapped with a translation function
          has_translation_choice <- xml2::xml_has_attr(
            this_rawchoicelabel, "ref"
          )

          if (has_translation_choice) {
            id_choice <- sub(
              "')",
              "",
              sub(
                "jr:itext\\('",
                "",
                xml2::xml_attr(this_rawchoicelabel, "ref")
              )
            )

            choice_translations <- all_translations[
              all_translations_ids == id_choice
            ]


            # iterate through choice translations
            for (kk in seq_along(choice_translations)) {
              # read translation
              this_choicetranslation <- choice_translations[kk]

              # first check this is a regular text labels.
              # Questions in ODK can have video, image and audio "labels",
              # which will be skipped.
              # This is identified by the presence of the 'form' attribute:
              is_regular_choicelabel <- !xml2::xml_has_attr(
                xml2::xml_find_first(this_choicetranslation, "./value"), "form"
              )

              if (is_regular_choicelabel) {
                # read the parent node to identify language:
                choice_translation_parent <- xml2::xml_parent(
                  this_choicetranslation
                )
                this_choicelang <- gsub(" ", "_", tolower(xml2::xml_attr(
                  choice_translation_parent, "lang"
                )))

                # decide if 'default' language or specific language
                if (this_choicelang == "default") {
                  # if 'default' language, save under 'choice':
                  choice_labels[["base"]][jj] <- xml2::xml_text(
                    xml2::xml_find_first(this_choicetranslation, "./value")
                  )
                } else {
                  # check if language already exists in the dataframe
                  if (!(paste0("choices_", this_choicelang) %in%
                    colnames(extension))) {
                    # if not, create new column
                    extension <- cbind(extension, data.frame(
                      new_choicelang = rep(NA, nrow(extension))
                    ))
                    colnames(extension)[ncol(extension)] <- paste0(
                      "choices_", this_choicelang
                    )
                  }

                  # add the first value content of the translation
                  choice_labels[[paste0(
                    "choices_",
                    this_choicelang
                  )]][jj] <- xml2::xml_text(
                    xml2::xml_find_first(
                      this_choicetranslation, "./value"
                    )
                  )
                }
              }
            }
          } else {
            choice_labels[["base"]][jj] <- xml2::xml_text(this_rawchoicelabel)
          }
        }

        # add to the extended table
        for (this_choicelang in names(choice_labels)) {
          these_choicelabels <- choice_labels[[this_choicelang]]

          if (this_choicelang == "base") {
            this_choicelang_colname <- "choices"
          } else {
            this_choicelang_colname <- this_choicelang
          }

          extension[nrow(extension), this_choicelang_colname] <- list(list(list(
            values = unlist(choice_values),
            labels = unlist(these_choicelabels)
          )))
        }
      }

      ### PART 1.2: parse complex choice labels,
      # i.e. in the presence of choice filters

      ## check existence of  choice itemset:
      choice_itemset <- xml2::xml_find_all(
        xml2::xml_parent(this_rawlabel), "./itemset"
      )


      if (length(choice_itemset) > 0) {
        # identify value node
        choicevalue_node <- xml2::xml_attr(
          xml2::xml_find_first(choice_itemset, "./value"),
          "ref"
        )

        # identify label node
        choicelabel_node <- xml2::xml_attr(
          xml2::xml_find_first(choice_itemset, "./label"),
          "ref"
        )

        # check if labels have translations
        has_translation_choice <- grepl("jr:itext", choicelabel_node)

        if (has_translation_choice) {
          # update choicelabel_node
          choicelabel_node <- sub(
            ")",
            "",
            sub(
              "jr:itext\\(",
              "",
              choicelabel_node
            )
          )
        }

        # extract content from the itemset:
        choice_nodeset <- xml2::xml_attr(choice_itemset, "nodeset")
        choice_nodeset_id <- substr(
          choice_nodeset,
          regexpr("\\(", choice_nodeset)[1] + 1,
          regexpr(")", choice_nodeset)[1] - 1
        )
        choice_itemset_content <- xml2::xml_find_all(
          frm_xml,
          paste0(
            ".//instance[@id=",
            choice_nodeset_id,
            "]//item"
          )
        )


        # check if 'choices' column already exist
        if (!("choices" %in% colnames(extension))) {
          # if not, create new column
          extension <- cbind(extension, data.frame(
            choices = rep(NA, nrow(extension))
          ))
        }

        # initialize lists
        choice_values <- list()
        choice_labels <- list()

        # iterate through choice list:
        for (jj in seq_along(choice_itemset_content)) {
          ## read choice item
          this_choiceitem <- choice_itemset_content[jj]

          # value
          this_choicevalue <- xml2::xml_text(
            xml2::xml_find_first(
              this_choiceitem,
              paste0("./", choicevalue_node)
            )
          )
          choice_values[jj] <- this_choicevalue

          if (has_translation_choice) {
            id_choice <- xml2::xml_text(
              xml2::xml_find_first(
                this_choiceitem,
                paste0("./", choicelabel_node)
              )
            )

            choice_translations <- all_translations[
              all_translations_ids == id_choice
            ]


            # iterate through choice translations
            for (kk in seq_along(choice_translations)) {
              # read translation
              this_choicetranslation <- choice_translations[kk]

              # first check this is a regular text labels.
              # Questions in ODK can have video, image and audio "labels",
              # which will be skipped.
              # This is identified by the presence of the 'form' attribute:
              is_regular_choicelabel <- !xml2::xml_has_attr(
                xml2::xml_find_first(this_choicetranslation, "./value"), "form"
              )

              if (is_regular_choicelabel) {
                # read the parent node to identify language:
                choice_translation_parent <- xml2::xml_parent(
                  this_choicetranslation
                )
                this_choicelang <- gsub(" ", "_", tolower(xml2::xml_attr(
                  choice_translation_parent, "lang"
                )))

                # decide if 'default' language or specific language
                if (this_choicelang == "default") {
                  # if 'default' language, save under 'choice':
                  choice_labels[["base"]][jj] <- xml2::xml_text(
                    xml2::xml_find_first(this_choicetranslation, "./value")
                  )
                } else {
                  # check if language already exists in the dataframe
                  if (!(paste0("choices_", this_choicelang) %in%
                    colnames(extension))) {
                    # if not, create new column
                    extension <- cbind(extension, data.frame(
                      new_choicelang = rep(NA, nrow(extension))
                    ))
                    colnames(extension)[ncol(extension)] <- paste0(
                      "choices_", this_choicelang
                    )
                  }

                  # add the first value content of the translation
                  choice_labels[[paste0(
                    "choices_",
                    this_choicelang
                  )]][jj] <- xml2::xml_text(
                    xml2::xml_find_first(
                      this_choicetranslation, "./value"
                    )
                  )
                }
              }
            }
          } else {
            choice_labels[["base"]][jj] <- xml2::xml_text(
              xml2::xml_find_first(
                this_choiceitem,
                paste0("./", choicelabel_node)
              )
            )
          }
        }

        # add to the extended table
        for (this_choicelang in names(choice_labels)) {
          these_choicelabels <- choice_labels[[this_choicelang]]

          if (this_choicelang == "base") {
            this_choicelang_colname <- "choices"
          } else {
            this_choicelang_colname <- this_choicelang
          }

          extension[nrow(extension), this_choicelang_colname] <- list(list(list(
            values = unlist(choice_values),
            labels = unlist(these_choicelabels)
          )))
        }
      }
    }
  }

  # join
  fs_ext <- frm_schema %>% dplyr::left_join(extension, by = "path")

  return(fs_ext)
}

# usethis::use_test("form_schema_ext") # nolint
dbca-wa/ruODK documentation built on March 20, 2024, 12:19 p.m.