R/feat_reconstruction.R

Defines functions reconstruct.abundance.tbl check.tbl.dim

Documented in check.tbl.dim reconstruct.abundance.tbl

#'@title Helper function to check if nested tibble is labeled or not
#'@description TBD
#'@param df.unpacked a data frame with nested list-columns
#'@return integer
#'@importFrom magrittr use_series
#'@importFrom purrr pluck
check.tbl.dim <- function(df.unpacked) {
  df.unpacked %>%
    magrittr::use_series(data) %>%
    purrr::pluck(1) %>%
    dim %>%
    purrr::pluck(2)
}


#'@title Reconstruct feature abundance table from tidy format
#'@description TBD
#'@param df.unpacked a data frame with nested list-columns
#'@param s.var a string specifying a stratification variable
#'@param b.var the other variable of interest, default = 'abundance'
#'@return a tibble with feature + nested data column comprising a single
#'     feature abundance table
#'@importFrom dplyr select_if bind_cols
#'@importFrom magrittr set_colnames
#'@importFrom purrr map_dfc
#'@importFrom tibble add_column
#'@export
reconstruct.abundance.tbl <- function(df.unpacked, original = FALSE,
                                      split = NULL,
                                      x.var = 'abundance', y.var, z.var) {

  # browser()
  if (original) {
    df.unpacked %>%
      use_series(data) %>%
      purrr::map_dfc(~ .$abundance) %>%
      magrittr::set_colnames(df.unpacked$feature) %>%
      tibble::add_column(!!z.var := get(z.var, df.unpacked$data[[1]]),
                         .before = 1) %>%
      tibble::add_column(!!y.var := get(y.var, df.unpacked$data[[1]]),
                         .before = 1)

  } else {
    df.split <- df.unpacked %>%
      select('feature','null','conf','bio') %>%
      # get desired split
      mutate(null = map(null, ~ (.) %>%
                          use_series(data) %>%
                          extract2(split))) %>%
      mutate(conf = map(conf, ~ (.) %>%
                          use_series(data) %>%
                          extract2(split))) %>%
      mutate(bio = map(bio, ~ (.) %>%
                          use_series(data) %>%
                          extract2(split)))

    # return three reconstructed data frames - one for each DAG
    df.null <- df.split %>%
      unpack.dag.column('null', y.var, z.var)

    df.conf <- df.split %>%
      unpack.dag.column('conf', y.var, z.var)

    df.bio <- df.split %>%
      unpack.dag.column('bio', y.var, z.var)

    return(list('null' = df.null,
                'conf' = df.conf,
                'bio' = df.bio))
  }
}

unpack.dag.column <- function(df.split, dag, y.var, z.var) {

  df.split %>%
    select(c('feature', dag)) %>%
    unnest(!!rlang::sym(dag)) %>%
    group_by(!!!rlang::syms(c(y.var, z.var))) %>%
    nest() %>%
    mutate(data = map(data, function(df) {
      # browser()
      df %>%
        group_by(feature) %>%
        nest() %>%
        ungroup() %>%
        mutate(data = map(data, ~ rowid_to_column(.))) %>%
        unnest_auto(data) %>%
        unnest(cols = c('rowid','abundance')) %>%
        rename(SampleID = 'rowid') })) %>%
    ungroup() %>%
    unnest(data) %>%
    pivot_wider(names_from = 'feature', values_from = 'abundance') %>%
    select(-SampleID) %>%
    ungroup()
}


#'@title Unpack list-columns of simulated data
#'@description TBD
#'@param df.packed a data frame with nested list-columns
#'@param s.var a string specifying a stratification variable
#'@param idx an integer specifying the index of the simulation number
#'@return a tibble with feature + nested data column comprising a single
#'     feature abundance table
#'@importFrom dplyr mutate_if
#'@importFrom magrittr use_series extract2
#'@importFrom purrr map
unpack.data.idx <- function(df.packed, s.var, idx, stratified = TRUE) {

  if (stratified) {
    df.packed %>%
      dplyr::mutate_if(is.list, ~ purrr::map(., function(x) {
        x %>%
          magrittr::use_series(data) %>%
          magrittr::extract2(idx) %>%
          dplyr::arrange(get(s.var))}))}

  else {
    df.packed %>%
      dplyr::mutate_if(is.list, ~ purrr::map(., function(x) {
        x %>%
          magrittr::use_series(data) %>%
          magrittr::extract2(idx) %>%
          magrittr::use_series(abundance)}))} #x$data[[idx]]$abundance}))}
}
sxmorgan/ansimo documentation built on June 26, 2020, 7:59 p.m.