R/data_generation.R

Defines functions tree_to_volume_frame tag_volume_frame parent_index node_numbers tree_to_ept_data tree_to_volume_frame_new tree_to_edt_data

Documented in node_numbers parent_index tag_volume_frame tree_to_edt_data tree_to_ept_data tree_to_volume_frame tree_to_volume_frame_new

#' Convert a volume tree to a data.frame
#'
#' Extract the name, label, and volumes for each node of the
#' tree. The volumes are then expanded out creating a
#' `data_frame` with n x v rows where n is the number of
#' rows and v is the number of volumes
#' @param vol_tree The volume tree of interest
#' @param filterFun which nodes to include in the tree, defaults
#' to isLeaf, all can be included with `function(n) TRUE`
#' @return A `data_frame` with the volume tree
#' @md
#' @export
tree_to_volume_frame <-
  function(vol_tree, filterFun = isLeaf){
    max_depth <-
      vol_tree$Get("level") %>%
      max
    
    parents <-
      rep("", max_depth) %>%
      setNames(paste0("level", seq_len(max_depth))) %>%
      as.list %>%
      (dplyr::as_data_frame)

    rep_last <- function(x, len){
      xlen <- length(x)
      
      if(xlen < len)
        x <- c(x, rep(x[xlen], len - xlen))

      x
    }
    
    vf <-
      vol_tree$Get(filterFun = filterFun
                 , function(node){
                   node_attrs <-
                     data_frame(name = node$name
                              , parent =
                                  `if`(!is.null(node$parent$name)
                                     , node$parent$name
                                     , NA)
                              , gparent =
                                  `if`(!is.null(node$parent$parent$name)
                                     , node$parent$parent$name
                                     , NA)
                              , volume = list(as.numeric(node$volumes))
                              , ind = list(as.character(1:length(node$volumes)))
                              , is_leaf = isLeaf(node))

                   depth <- node$level
                   node_path <- parents
                   node_path[1,] <- rep_last(node$path, max_depth)

                   cbind(node_attrs, node_path)
                 }
               , simplify = FALSE
               , traversal = "post-order") %>%
      bind_rows %>%
      unnest

    vf
  }

#' Associate metadata with a volume tree frame
#'
#' Take a volume tree frame generated by [tree_to_volume_frame]
#' and associated metadata. The rows of metadata should correspond
#' to the order of the volumes in the original volume tree.
#'
#' @param vol_frame the volume frame
#' @param metadata the metadata frame
#' @return the tagged data frame
#' @export
tag_volume_frame <-
  function(vol_frame, metadata){
    metadata_munged <-
      metadata %>% mutate(ind = as.character(seq_len(n())))


    inner_join(vol_frame, metadata_munged, by = "ind") %>%
      group_by(name) %>%
      mutate(scaled_vol = as.numeric(scale(volume))
           , mean_vol = mean(volume)
           , sd_vol = sd(volume)) %>%
      ungroup
  }


#' Find the parent indices for each node
#'
#' Walks the tree in pre-order extracting the index
#' of the parent for each structure.
#'
#' @param tree The tree of interest
#' @return An integer vector with parent indices
#' @export
parent_index <-
  function(tree){
    names <- tree$Get("name")
    parent_name <- tree$Get(function(n) n$parent$name)

    node_ind <- seq_along(names) %>% setNames(names)
    parent_ind <- node_ind[parent_name]

    parent_ind[is.na(parent_ind)] <- 0
    names(parent_ind) <- names

    parent_ind
  }

#' Get node numbers
#'
#' Take a vector of node names and return their index according
#' to a tree walked in pre-order.
#'
#' @param x a vector of node names
#' @param tree a tree to get the index from
#' @return a vector with length x containing the node indices
#' @export
node_numbers <-
  function(x, tree){
    names <- tree$Get("name")
    inds <- seq_along(names) %>% setNames(names)

    inds[x]
  }


#' Convert a tree and metadata into useable data for ept
#' data
#'
#' This was the original incarnation, now out of date.
#'
#' @param tree The tree containing volumes
#' @param metadata The metadata necessary, requires `SEX` as
#' the covariate.
#' @return a list containing the requiste data for fitting an
#' ept model.
#' @export 
tree_to_ept_data <- function(tree, metadata, justLeaves = FALSE){
  hvf <- tree_to_volume_frame(tree, function(n) TRUE)
  hvft <- tag_volume_frame(hvf, metadata)

  leaves <- tree$Get("name", filterFun = isLeaf)
  nodes <- tree$Get("name")
  
  sds <-
    hvft %>%
    group_by(name) %>%
    summarize(sd_vol = sd_vol[1]) %>%
    with( setNames(sd_vol, name) )

  if(justLeaves){
    hvf_sub <- hvft %>% filter(is_leaf)
  } else {
    hvf_sub <- hvft
  }

  hept_data <-
  lst(N = nrow(hvf_sub)
    , P = 2
    , R = 1
    , NNodes = length(unique(hvft$name))
    , y = hvf_sub$scaled_vol
    , node_number = node_numbers(hvf_sub$name, tree)
    , node_parent = parent_index(tree)
    , parent_ind = match(node_parent, unique(node_parent))
    , NParents = length(unique(node_parent))
    , model_matrix = model.matrix(~ SEX, data = hvf_sub)
    , ranint_matrix = matrix(as.integer(as.factor(hvf_sub$ID)))
    , ranint_sizes = as.array(max(ranint_matrix))
    , Ranint_max = max(ranint_sizes)
    , ranint_shape = 1
    , lkj_shape = 1
    , tau_shape = 2
    , tau_rate = 3
    , pi_conc = 1
    , sig_model_shape = 1
      )

  list(hvft = hvf_sub
     , hept = hept_data
     , sds = sds[nodes]
       ) 
}

#' Convert a volume tree to a data.frame
#'
#' Extract the name, label, and volumes for each node of the
#' tree. The volumes are then expanded out creating a
#' `data_frame` with n x v rows where n is the number of
#' rows and v is the number of volumes
#' @param vol_tree The volume tree of interest
#' @param filterFun which nodes to include in the tree, defaults
#' to isLeaf, all can be included with `function(n) TRUE`
#' @return A `data_frame` with the volume tree
#' @md
#' @export
tree_to_volume_frame_new <-
  function(vol_tree, filterFun = isLeaf){
    max_depth <-
      vol_tree$Get("level") %>%
      max
    
    parents <-
      rep("", max_depth) %>%
      setNames(paste0("p", rev(seq_len(max_depth) - 1))) %>%
      as.list %>%
      (dplyr::as_data_frame)

    rep_last <- function(x, len){
      xlen <- length(x)
      
      if(xlen < len)
        x <- c(x, rep(x[xlen], len - xlen))

      x
    }
    
    vf <-
      vol_tree$Get(filterFun = filterFun
                 , function(node){
                   node_attrs <-
                     data_frame(name = node$name
                              , parent =
                                  `if`(!is.null(node$parent$name)
                                     , node$parent$name
                                     , NA)
                              , gparent =
                                  `if`(!is.null(node$parent$parent$name)
                                     , node$parent$parent$name
                                     , NA)
                              , volume = list(as.numeric(node$volumes))
                              , ind = list(as.character(1:length(node$volumes)))
                              , is_leaf = isLeaf(node))

                   depth <- node$level
                   node_path <- parents
                   node_path[1,] <- rep_last(node$path, max_depth)

                   cbind(node_attrs, node_path)
                 }
               , simplify = FALSE
               , traversal = "post-order") %>%
      bind_rows %>%
      mutate(bv = list(vol_tree$volumes)) %>%
      unnest

    vf
  }

#' Convert a tree and metadata into useable data for hierarchical
#' modelling
#'
#' @param tree The tree containing volumes
#' @param metadata The metadata necessary, requires `SEX` as
#' the covariate.
#' @param scale Whether or not to centre/scale the volumes
#' @param justLeaves Whether to filter down the data to
#' just the leaf volume (default true)
#' @return a list containing the requisite data for 
#' fitting either flat models or effect diffusion models.
#' @export 
tree_to_edt_data <-
  function(tree, metadata, scale = FALSE, justLeaves = TRUE
           , model_formula = ~ group
           ){
  hvf <- tree_to_volume_frame_new(tree, function(n) TRUE)
  hvft <- tag_volume_frame(hvf, metadata)

  leaves <- tree$Get("name", filterFun = isLeaf)
  nodes <- tree$Get("name")
  
  sds <-
    hvft %>%
    group_by(name) %>%
    summarize(sd_vol = sd_vol[1]) %>%
    with( setNames(sd_vol, name) )

  if(justLeaves){
    hvf_sub <- hvft %>% filter(is_leaf)
  } else {
    hvf_sub <- hvft
  }

  hept_data <-
  lst(N = nrow(hvf_sub)
    , P = 2
    , R = 1
    , NNodes = length(unique(hvft$name))
    , y = `if`(scale, hvf_sub$scaled_vol, hvf_sub$volume)
    , node_number = node_numbers(hvf_sub$name, tree)
    , node_parent = parent_index(tree)
    , parent_ind = match(node_parent, unique(node_parent))
    , NParents = length(unique(node_parent))
    , model_matrix = model.matrix(model_formula, data = hvf_sub)
    , ranint_matrix = matrix(as.integer(as.factor(hvf_sub$ID)))
    , ranint_sizes = as.array(max(ranint_matrix))
    , Ranint_max = max(ranint_sizes)
    , ranint_shape = 1
    , lkj_shape = 1
    , tau_shape = 2
    , tau_rate = 3
    , pi_conc = 1
    , sig_model_shape = 1
      )

  list(vol_frame = hvf_sub
     , stan_list = hept_data
     , sds = sds[nodes]
       ) 
}

#' @describeIn tree_to_edt_data deprecated old variant
#' @export
tree_to_ept_data_new <- tree_to_edt_data
cfhammill/hierarchyTrees documentation built on Feb. 8, 2020, 2:54 a.m.