R/as_cell_df_internal.R
In tidycells: Read Tabular Data from Diverse Sources and Easily Make Them Tidy

as_cell_df_internal <- function(d, ...) {
  UseMethod("as_cell_df_internal")
}


as_cell_df_internal.default <- function(d,
                                        take_row_names = FALSE,
                                        take_col_names = FALSE) {
  if (!is.data.frame(d)) {
    abort("Data frame is expected")
  }

  if (nrow(d) < 1) {
    abort("at least 1 row is required")
  }

  if (validate_cells(d)) {
    return(new_cell_df(d))
  }

  # applicable on intermediate class "unknown"

  rcdf_possible <- is_conforms_to_rcdf(d)
  d_out <- tibble()

  if (!rcdf_possible) {
    warn_msg <- attr(rcdf_possible, "msg")
    wlvl <- attr(rcdf_possible, "lvl")
    if (length(wlvl) != 1) wlvl <- 0
    if (is.na(wlvl)) wlvl <- 0
  } else {
    warn_msg <- character(0)
    wlvl <- 0
  }

  conv_done <- FALSE
  translate <- FALSE

  if (rcdf_possible) {
    # presence of value column

    if (hasName(d, "value")) {
      if (rlang::is_atomic(d$value)) {
        d_out <- d %>% mutate(data_type = ifelse(is.numeric(d$value), "numeric", "character"))
        conv_done <- TRUE
      } else {
        warn_msg <- warn_msg %>%
          c("The data has (row, col, value) but value column is not atomic.")
      }
    } else {
      rest_cols <- colnames(d) %>% setdiff(c("row", "col", "data_type"))

      # only single column except row, col and data_type
      if (length(rest_cols) == 1) {
        rest_col <- d[[rest_cols]]

        if (rlang::is_atomic(rest_col)) {
          d_out <- d %>%
            mutate(
              value = rest_col,
              data_type = ifelse(is.numeric(value), "numeric", "character")
            )

          conv_done <- TRUE
        } else {
          warn_msg <- warn_msg %>%
            c("The data has (row, col) and a single column possibly containing values, but that column is not atomic.")
        }
      } else {
        warn_msg <- warn_msg %>%
          c("The data has (row, col). However, the column containing value is not detectable")
      }
    }

    if (!conv_done) {
      warn_msg <- warn_msg %>%
        c(paste0(
          "The data conforms minimal row col data structure. ",
          "However, the column containing value is not detectable. ",
          "Data transformation is used (check if the output is as expected)."
        )) %>%
        unique()

      wlvl <- max(wlvl, 2)

      translate <- TRUE
    }
  } else {
    translate <- TRUE
  }

  warn_msg <- unique(warn_msg)

  if (translate) {
    if (length(warn_msg) > 0 & wlvl > 1) {
      message("Data is transformed into row-col-value format. (check if the output is as expected)")
      warn(paste0(warn_msg, collapse = "\n"))
    }

    d_out <- d %>%
      unpivotr::as_cells(row_names = take_row_names, col_names = take_col_names) %>%
      attach_intermediate_class() %>%
      as_cell_df()

    conv_done <- TRUE
  }

  if (conv_done) {
    d_out$value <- as.character(d_out$value)
    new_cell_df(d_out)
  } else {
    abort("unknown error occurred")
  }
}

as_cell_df_internal.cell_df <- function(d) {
  new_cell_df(d)
}

as_cell_df_internal.tidyxl <- function(d) {
  if (!is.data.frame(d)) {
    abort("Data frame is expected")
  }

  if (nrow(d) < 1) {
    abort("at least 1 row is required")
  }

  n_sheets <- d %>%
    distinct(sheet) %>%
    nrow()

  if (n_sheets != 1) {
    abort(paste("Supplied data is detected to have data structure similar to tidyxl::xlsx_cells output.",
      "Which is already in cell_df like format. But it has multiple sheets.",
      "cell_df can be constructed for a single sheet only.",
      "You may like to split the data by sheet and then obtain cell_df for each sheets.",
      sep = "\n"
    ))
  }

  d_out <- d %>%
    filter(!is_blank) %>%
    mutate(value = case_when(
      data_type == "numeric" ~ as.character(numeric),
      data_type == "logical" ~ as.character(logical),
      data_type == "date" ~ as.character(date),
      data_type == "character" ~ as.character(character),
      TRUE ~ NA_character_
    )) %>%
    mutate(data_type = ifelse(data_type == "numeric", "numeric", "character")) %>%
    filter(!is.na(value)) %>%
    distinct(row, col, data_type, value)

  new_cell_df(d_out)
}

as_cell_df_internal.unpivotr <- function(d) {
  if (!is.data.frame(d)) {
    abort("Data frame is expected")
  }

  if (nrow(d) < 1) {
    abort("at least 1 row is required")
  }

  cols <- tibble(
    cn = c("chr", "cplx", "dbl", "fct", "int", "lgl", "ord", "date", "dttm"),
    bv = list(NA_character_, NA_complex_, NA_real_, list(), NA_integer_, NA, list(), as.Date(NA), as.POSIXct(NA))
  )

  other_types <- setdiff(d$data_type, cols$cn)
  if (length(other_types) > 0) {
    warn(paste0(
      "Internal data contains types (",
      paste0(unique(other_types), collapse = ", "),
      ") which are not yet implemented."
    ))
  }

  for (cn in seq(nrow(cols))) {
    if (!hasName(d, cols$cn[cn])) {
      d[[cols$cn[cn]]] <- cols$bv[cn]
    }
  }

  d_out <- d %>%
    mutate(value = case_when(
      data_type == "chr" ~ as.character(chr),
      data_type == "cplx" ~ as.character(cplx),
      data_type == "fct" ~ (fct %>% map(as.character) %>% map_chr(~ ifelse(length(.x), .x, NA_character_))),
      data_type == "dbl" ~ as.character(dbl),
      data_type == "int" ~ as.character(int),
      data_type == "lgl" ~ as.character(lgl),
      data_type == "ord" ~ (ord %>% map(as.character) %>% map_chr(~ ifelse(length(.x), .x, NA_character_))),
      data_type == "date" ~ as.character(date),
      data_type == "dttm" ~ as.character(dttm),
      TRUE ~ NA_character_
    )) %>%
    mutate(data_type = ifelse(data_type %in% c("cplx", "dbl", "int"), "numeric", "character")) %>%
    filter(!is.na(value)) %>%
    distinct(row, col, data_type, value)

  new_cell_df(d_out)
}

as_cell_df_internal.readr <- function(d) {
  if (!is.data.frame(d)) {
    abort("Data frame is expected")
  }

  if (nrow(d) < 1) {
    abort("at least 1 row is required")
  }

  d_out <- d %>%
    filter(data_type != "missing") %>%
    mutate(data_type = ifelse(data_type %in% c("integer", "double"), "numeric", "character")) %>%
    filter(!is.na(value)) %>%
    distinct(row, col, data_type, value)

  new_cell_df(d_out)
}

Any scripts or data that you put into this service are public.

tidycells documentation built on March 26, 2020, 7:35 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

tidycells
Read Tabular Data from Diverse Sources and Easily Make Them Tidy

R/as_cell_df_internal.R
In tidycells: Read Tabular Data from Diverse Sources and Easily Make Them Tidy

Defines functions as_cell_df_internal as_cell_df_internal.default as_cell_df_internal.cell_df as_cell_df_internal.tidyxl as_cell_df_internal.unpivotr as_cell_df_internal.readr

Try the tidycells package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tidycells Read Tabular Data from Diverse Sources and Easily Make Them Tidy

R/as_cell_df_internal.R In tidycells: Read Tabular Data from Diverse Sources and Easily Make Them Tidy

Defines functions as_cell_df_internal as_cell_df_internal.default as_cell_df_internal.cell_df as_cell_df_internal.tidyxl as_cell_df_internal.unpivotr as_cell_df_internal.readr

Try the tidycells package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tidycells
Read Tabular Data from Diverse Sources and Easily Make Them Tidy

R/as_cell_df_internal.R
In tidycells: Read Tabular Data from Diverse Sources and Easily Make Them Tidy