inst/doc/principal_balances.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(digits = 4)

## -----------------------------------------------------------------------------
library(coda.base)
X = parliament2017[,-1]

## -----------------------------------------------------------------------------
B1 = pb_basis(X, method = "exact")

## ----fig.width=7.5, fig.height=4.5--------------------------------------------
plot_balance(B1)

## -----------------------------------------------------------------------------
apply(coordinates(X, B1), 2, var)

## ----echo=FALSE, fig.width=7.5, fig.height=4.5, warning=FALSE-----------------
library(ggplot2)
D = 10

# time_ = sapply(5:20, function(D){
#   print(D)
#   x = matrix(rlnorm(100*D), ncol=D)
#   a = tic()
#   r = pb_basis(x, method = 'exact')
#   b = toc()
#   b$toc - b$tic
# })
# dplot = data.frame(
#   parts = tail(5:20, 11),
#   time = tail(time_, 11)
# )

dplot = structure(list(parts = 10:20, time = c(0.00499999999999545, 0.0119999999999436, 
0.0260000000000673, 0.0749999999999318, 0.243000000000052, 0.831999999999994, 
2.64400000000001, 9.56200000000001, 28.787, 95.4560000000001, 
311.133)), class = "data.frame", row.names = c(NA, -11L))

ggplot(data=dplot) +
  geom_point(aes(x=parts, y=time)) +
  geom_segment(aes(x=parts, xend =parts, y = 0, yend=time)) +
  labs(x = 'Number of parts', y = 'Time in seconds (logarithmic scale)', title = 'Time needed to calculate Principal Balances',
       caption = "Times measured in a MacBook Pro (13-inch, 2017) \n2,3 GHz Dual-Core Intel Core i5.16 GB 2133 MHz LPDDR3") +
  scale_x_continuous(breaks = tail(5:20, 11)) +
  scale_y_continuous(trans = 'log', breaks = c(0.004, 0.04, 0.4, 4, 40, 400), labels = c("0.004", "0.04", "0.4", 4, 40, 400)) +
  theme_minimal()

## -----------------------------------------------------------------------------
D = as.dist(variation_array(X))
D

## ----fig.width=7.5, fig.height=4.5--------------------------------------------
B2 = pb_basis(X, method = 'cluster')
plot_balance(B2)

## -----------------------------------------------------------------------------
apply(coordinates(X, B2), 2, var)

## ----fig.width=7.5, fig.height=4.5--------------------------------------------
B3 = pb_basis(X, method = 'constrained')
plot_balance(B3)

## -----------------------------------------------------------------------------
apply(coordinates(X, B3), 2, var)

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  hc = hclust_dendrogram(B1)
#  hcd = as.dendrogram(hc)
#  dd = dendro_data(hcd)
#  ggdendrogram(dd)
#  dd$segments = dd$segments %>%
#    mutate(
#      end_node = yend == 0
#    )
#  
#  p <- ggplot(dd$segments) +
#    geom_segment(aes(x = x, y = y, xend = xend, yend = yend, linetype = end_node))+
#    geom_label(data = dd$labels, aes(x, y, label = label),
#              hjust = 0.5, angle = 90, size = 4) +
#    theme_void() + scale_linetype_discrete(guide=FALSE)
#  p
#  
#  ## Build tree
#  build_tree_order = function(B, ibalance){
#    balance = B[,ibalance]
#    if(sum(balance != 0) == 2){
#      return(ibalance)
#    }
#    L = NULL; R = NULL
#    left_ = balance < 0
#    if(sum(left_) > 1){
#      L = Recall(B, which(apply((B != 0 & left_) | (B == 0 & !left_), 2, all)))
#    }
#    right_ = balance > 0
#    if(sum(right_) > 1){
#      R = Recall(B, which(apply((B != 0 & right_) | (B == 0 & !right_), 2, all)))
#    }
#    return(unname(c(L, R, ibalance)))
#  }
#  ord_ = build_tree_order(B, which(apply(B != 0, 2, all)))
#  TREE = setNames(lapply(1:ncol(X), function(i) list('c' = i)), names(X)[ORDER])
#  for(i in ord_){
#    balance = B[,i]
#    name_ = paste(sort(names(balance)[balance != 0]), collapse = '_')
#    name_left = paste(sort(names(balance)[balance < 0]), collapse = '_')
#    name_right = paste(sort(names(balance)[balance > 0]), collapse = '_')
#    l_node = list(
#      list(
#        name = name_,
#        left = name_left,
#        right = name_right,
#        l = TREE[[name_left]]$c,
#        r = TREE[[name_right]]$c,
#        c = (TREE[[name_left]]$c+TREE[[name_right]]$c)/2
#    ))
#    names(l_node) = name_
#    TREE = c(TREE, l_node)
#  }
#  nodes = sapply(ord_, function(i){
#    balance = B[,i]
#    paste(sort(names(balance)[balance != 0]), collapse = '_')
#  })
#  TREE = TREE[nodes]
#  lapply(TREE, as_tibble) %>% bind_rows()
#  for(node in rev(nodes)){
#    TREE[[node]]
#  }
#  
#  # lapply(nodes, function(name_){
#  #   tibble(
#  #     name = name_,
#  #     name_l = TREE[[name_]]$left,
#  #     name_r = TREE[[name_]]$right,
#  #     l = TREE[[name_]]$l,
#  #     r = TREE[[name_]]$r,
#  #     c = TREE[[name_]]$c,
#  #     var = var(H[,i])
#  #   )
#  # }) %>% bind_rows()
#  
#  
#  H = coordinates(X, B)
#  H_mean = colMeans(H)
#  H_var = apply(H, 2, var)
#  
#  names(X)[ORDER]
#  
#  l_nodes = lapply(rev(ord_), function(i){
#    list(
#  
#      balance = B[,i],
#      mean = boot::inv.logit(H_mean[i]),
#      var = H_var[i]
#    )
#  })
#  names(l_nodes) = sapply(l_nodes, function(x) paste(sort(names(x$balance)[x$balance != 0]), collapse = '_'))
#  l_nodes

## ----eval=FALSE, include=FALSE------------------------------------------------
#  library(coda.base)
#  X = iris[,1:4]
#  B = pb_basis(X, method = 'exact')
#  rownames(B) = names(X)
#  H = coordinates(X, B)
#  apply(H, 2, var)

## ----eval = FALSE, include=FALSE----------------------------------------------
#  plot(H[,1], H[,2])

## ----eval=FALSE, include=FALSE------------------------------------------------
#  lX = split(X, iris$Species)
#  m_ = lapply(lX, colMeans)
#  s_ = lapply(lX, cov)
#  S = cov(X)
#  S_ = replicate(3, S, simplify = FALSE)

## ----eval=FALSE, include=FALSE------------------------------------------------
#  Prob = mapply(function(m,s){
#    mvtnorm::dmvnorm(X, m, s)
#  }, m_, S_)

## ----eval=FALSE, include=FALSE------------------------------------------------
#  B_1 = pb_basis(Prob, method = 'exact')
#  H_1 = coordinates(Prob, B_1)
#  plot(H_1, col = iris$Species)

## ----eval=FALSE, include=FALSE------------------------------------------------
#  B_2 = pc_basis(Prob)
#  H_2 = coordinates(Prob, B_2)
#  plot(-H_2, col = iris$Species)

## ----eval=FALSE, include=FALSE------------------------------------------------
#  library(fpc)
#  dp = discrproj(X, iris$Species)
#  plot(dp$proj[,1:2], col = iris$Species)

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  @article{cite-key,
#  	Date-Added = {2020-06-13 08:44:39 +0000},
#  	Date-Modified = {2020-06-13 08:44:39 +0000},
#  	Id = {ref25},
#  	Title = {Pawlowsky-Glahn V, Egozcue JJ, Tolosana-Delgado R (2011) Principal balances. In Egozcue JJ, Tolosana-Delgado R, Ortego M (eds) Proceedings of the 4th international workshop on compositional data analysis, Girona, Spain, pp 1--10},
#  	Ty = {STD}}
#  
#  @article{cite-key,
#  	Author = {Mart{\'\i}n-Fern{\'a}ndez, J. A. and Pawlowsky-Glahn, V. and Egozcue, J. J. and Tolosona-Delgado, R.},
#  	Da = {2018/04/01},
#  	Date-Added = {2020-06-13 08:42:47 +0000},
#  	Date-Modified = {2020-06-13 08:42:47 +0000},
#  	Doi = {10.1007/s11004-017-9712-z},
#  	Id = {Mart{\'\i}n-Fern{\'a}ndez2018},
#  	Isbn = {1874-8953},
#  	Journal = {Mathematical Geosciences},
#  	Number = {3},
#  	Pages = {273--298},
#  	Title = {Advances in Principal Balances for Compositional Data},
#  	Ty = {JOUR},
#  	Url = {https://doi.org/10.1007/s11004-017-9712-z},
#  	Volume = {50},
#  	Year = {2018},
#  	Bdsk-Url-1 = {https://doi.org/10.1007/s11004-017-9712-z}}
#  
#  Frank Ruskey LEcture note computer science 762 (1993) 205-206
#  @String{j-LECT-NOTES-COMP-SCI   = "Lecture Notes in Computer Science"}
#  @String{ack-nhfb = "Nelson H. F. Beebe,
#                      University of Utah,
#                      Department of Mathematics, 110 LCB,
#                      155 S 1400 E RM 233,
#                      Salt Lake City, UT 84112-0090, USA,
#                      Tel: +1 801 581 5254,
#                      FAX: +1 801 581 4148,
#                      e-mail: \path|beebe@math.utah.edu|,
#                              \path|beebe@acm.org|,
#                              \path|beebe@computer.org| (Internet),
#                      URL: \path|http://www.math.utah.edu/~beebe/|"}
#  @Article{Ruskey:1993:SCG,
#    author =       "F. Ruskey",
#    title =        "Simple Combinatorial Gray Codes Constructed by
#                   Reversing Sublists",
#    journal =      j-LECT-NOTES-COMP-SCI,
#    volume =       "762",
#    pages =        "201--208",
#    year =         "1993",
#    CODEN =        "LNCSD9",
#    ISSN =         "0302-9743 (print), 1611-3349 (electronic)",
#    ISSN-L =       "0302-9743",
#    bibdate =      "Wed Sep 15 10:01:31 MDT 1999",
#    bibsource =    "http://www.math.utah.edu/pub/tex/bib/lncs1993.bib",
#    acknowledgement = ack-nhfb,
#    keywords =     "algorithms; computation; ISAAC",
#  }

Try the coda.base package in your browser

Any scripts or data that you put into this service are public.

coda.base documentation built on Nov. 26, 2023, 1:07 a.m.