R/spec_guess_object_list.R

Defines functions is_field_row is_field_scalar get_required guess_object_list_field_spec guess_object_list_spec guess_tspec_object_list

guess_tspec_object_list <- function(x,
                                   ...,
                                   empty_list_unspecified = FALSE,
                                   simplify_list = FALSE,
                                   call = current_call()) {
  check_dots_empty()
  withr::local_options(list(tibblify.used_empty_list_arg = NULL))
  if (is.data.frame(x)) {
    msg <- c(
      "{.arg x} must not be a dataframe.",
      i = "Did you want to use {.fn guess_tspec_df} instead?"
    )
    cli::cli_abort(msg, call = call)
  }

  if (!is.list(x)) {
    cls <- class(x)[[1]]
    msg <- "{.arg x} must be a list. Instead, it is a {.cls {cls}}."
    cli::cli_abort(msg, call = call)
  }

  fields <- guess_object_list_spec(
    x,
    empty_list_unspecified = empty_list_unspecified,
    simplify_list = simplify_list
  )

  names_to <- NULL
  if (is_named(x)) {
    names_to <- ".names"
  }

  tspec_df(
    !!!fields,
    .names_to = names_to,
    vector_allows_empty_list = is_true(getOption("tibblify.used_empty_list_arg"))
  )
}

guess_object_list_spec <- function(x,
                                   empty_list_unspecified,
                                   simplify_list) {
  required <- get_required(x)

  # need to remove empty elements for `purrr::transpose()` to work...
  x <- vctrs::list_drop_empty(x)

  x_t <- purrr::transpose(unname(x), names(required))

  purrr::pmap(
    tibble::tibble(
      value = x_t,
      name = names(required),
      required = unname(required)
    ),
    guess_object_list_field_spec,
    empty_list_unspecified = empty_list_unspecified,
    simplify_list = simplify_list
  )
}

guess_object_list_field_spec <- function(value,
                                         name,
                                         required,
                                         empty_list_unspecified,
                                         simplify_list) {
  ptype_result <- get_ptype_common(value, empty_list_unspecified)

  # no common ptype can be one of two reasons:
  # * it contains non-vector elements
  # * it contains incompatible types
  # in both cases `tib_variant()` is used
  if (!ptype_result$has_common_ptype) {
    return(tib_variant(name, required = required))
  }

  # now we know that every element essentially has type `ptype`
  ptype <- ptype_result$ptype
  if (is_null(ptype)) {
    return(tib_unspecified(name, required = required))
  }

  ptype_type <- tib_type_of(ptype, name, other = FALSE)
  if (ptype_type == "vector") {
    if (is_field_scalar(value)) {
      return(tib_scalar(name, ptype, required = required))
    } else {
      mark_empty_list_argument(is_true(ptype_result$had_empty_lists))
      return(tib_vector(name, ptype, required = required))
    }
  }

  if (ptype_type == "df") {
    # TODO should this actually be supported?
    # TODO fix error call?
    cli::cli_abort("a list of dataframes is not yet supported")
  }

  if (ptype_type != "list") {
    cli::cli_abort("{.fn tib_type_of} returned an unexpected type", .internal = TRUE)
  }

  value_flat <- vec_flatten(value, ptype, name_spec = NULL)
  if (is_object_list(value_flat)) {
    spec <- guess_make_tib_df(
      name,
      values_flat = value_flat,
      required = required,
      empty_list_unspecified = empty_list_unspecified,
      simplify_list = simplify_list
    )
    return(spec)
  }

  if (is_field_row(value)) {
    fields <- guess_object_list_spec(
      value,
      empty_list_unspecified = empty_list_unspecified,
      simplify_list = simplify_list
    )
    return(maybe_tib_row(name, fields, required))
  }

  ptype_result <- get_ptype_common(value_flat, empty_list_unspecified)
  if (!ptype_result$has_common_ptype) {
    return(tib_variant(name, required = required))
  }

  ptype <- ptype_result$ptype
  if (is_null(ptype)) {
    return(tib_unspecified(name, required = required))
  }
  if (identical(ptype, list()) || identical(ptype, set_names(list()))) {
    return(tib_unspecified(name, required = required))
  }

  if (!simplify_list) {
    return(tib_variant(name, required = required))
  }

  if (is_field_scalar(value_flat)) {
    if (is_named(value_flat)) {
      return(tib_vector(name, ptype, required = required, input_form = "object"))
    } else {
      return(tib_vector(name, ptype, required = required, input_form = "scalar_list"))
    }
  }

  tib_variant(name, required = required)
}

get_required <- function(x, sample_size = 10e3) {
  n <- vec_size(x)
  x <- unname(x)
  if (n > sample_size) {
    n <- sample_size
    x <- vec_slice(x, sample(n, sample_size))
  }

  all_names <- vec_c(!!!lapply(x, names), .ptype = character())
  names_count <- vec_count(all_names, "location")

  empty_loc <- lengths(x) == 0L
  if (any(empty_loc)) {
    rep_named(names_count$key, FALSE)
  } else {
    set_names(names_count$count == n, names_count$key)
  }
}

is_field_scalar <- function(value) {
  sizes <- list_sizes(value)
  if (any(sizes > 1)) {
    return(FALSE)
  }

  size_0_is_null <- vec_detect_missing(value[sizes == 0])
  if (all(size_0_is_null)) {
    return(TRUE)
  }

  FALSE
}

is_field_row <- function(value) {
  is_object_list(value)
}

Try the tibblify package in your browser

Any scripts or data that you put into this service are public.

tibblify documentation built on Nov. 16, 2022, 5:07 p.m.