countdata.R
In countdata: The Beta-Binomial Test for Count Data

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  eval = FALSE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
#  library(countdata)

## -----------------------------------------------------------------------------
#  d <- read.delim("https://tvpham.github.io/data/example-3groups.txt")
#  
#  head(d)
#  #>    a1  a2  a3  b1  b2  b3  c1  c2
#  #> 1 624 496 509 414 394 375 325 288
#  #> 2 615 854 930 341 523 360 359 329
#  #> 3 553 560 745 819 490 481 480 500
#  #> 4 525 412 401 354 321 310 258 228
#  #> 5 484 284 315 268 282 307 270 298
#  #> 6 482 348 400 242 365 367  81 118
#  
#  # compare the first 3 samples against the next three samples
#  out <- countdata::bb.test(d[, 1:6],
#                            colSums(d[, 1:6]),
#                            c(rep("a", 3), rep("b", 3)))
#  #> Using 11 thread(s) ...
#  #> No. of data rows = 1786, no. of groups = 2, no. of samples = 6...
#  #> 5%
#  #> 11%
#  #> 18%
#  #> 23%
#  #> 30%
#  #> 35%
#  #> 41%
#  #> 49%
#  #> 55%
#  #> 64%
#  #> 81%
#  #> 87%
#  #> 93%
#  #> 98%
#  #> Done.
#  
#  d.norm <-  countdata::normalize(d[, 1:8])
#  
#  write.table(cbind(d, d.norm,
#                    fc = countdata::fold.change(d.norm[, 1:3], d.norm[, 4:6]),
#                    pval = out$p.value,
#                    pval.BH = p.adjust(out$p.value, method = "BH")),
#              file = "output.txt", row.names = FALSE, sep = "\t")

## -----------------------------------------------------------------------------
#  d <- read.delim("https://tvpham.github.io/data/example-3groups.txt")
#  
#  head(d)
#  #>    a1  a2  a3  b1  b2  b3  c1  c2
#  #> 1 624 496 509 414 394 375 325 288
#  #> 2 615 854 930 341 523 360 359 329
#  #> 3 553 560 745 819 490 481 480 500
#  #> 4 525 412 401 354 321 310 258 228
#  #> 5 484 284 315 268 282 307 270 298
#  #> 6 482 348 400 242 365 367  81 118
#  
#  # compare the first 3 samples, the next three samples, and the last two samples.
#  out <- countdata::bb.test(d[, 1:8],
#                            colSums(d[, 1:8]),
#                            c(rep("a", 3), rep("b", 3), rep("c", 2)))
#  #> Using 11 thread(s) ...
#  #> No. of data rows = 1786, no. of groups = 3, no. of samples = 8...
#  #> 0%
#  #> 5%
#  #> 10%
#  #> 16%
#  #> 23%
#  #> 33%
#  #> 40%
#  #> 45%
#  #> 51%
#  #> 58%
#  #> 63%
#  #> 70%
#  #> 76%
#  #> 81%
#  #> 87%
#  #> 93%
#  #> 98%
#  #> Done.
#  
#  d.norm <-  countdata::normalize(d[, 1:8])
#  
#  write.table(cbind(d, d.norm,
#                    pval = out$p.value,
#                    pval.BH = p.adjust(out$p.value, method = "BH")),
#              file = "output.txt", row.names = FALSE, sep = "\t")

## -----------------------------------------------------------------------------
#  d <- read.delim("https://tvpham.github.io/data/example-paired.txt")
#  
#  head(d)
#  #>   pre.1 pre.2 pre.3 post.1 post.2 post.3
#  #> 1   575   179   335    505    172    204
#  #> 2   294   245   256    396    390    265
#  #> 3   293   282   320    372    240    204
#  #> 4   303   282   250    307    243    227
#  #> 5   396   271   171    327    216    103
#  #> 6   238   261   271    245    234    215
#  
#  out <- countdata::ibb.test(d[, 1:6],
#                             colSums(d[, 1:6]),
#                             c(rep("pre_treatment", 3), rep("post_treatment", 3)))
#  #> Using 11 thread(s) ...
#  #> No. of data rows = 2919, no. of pair(s) = 3...
#  #> 0%
#  #> 5%
#  #> 11%
#  #> 16%
#  #> 21%
#  #> 26%
#  #> 31%
#  #> 36%
#  #> 41%
#  #> 46%
#  #> 51%
#  #> 57%
#  #> 63%
#  #> 68%
#  #> 73%
#  #> 78%
#  #> 83%
#  #> 88%
#  #> 94%
#  #> 99%
#  #> Done.
#  
#  d.norm <-  countdata::normalize(d[, 1:6])
#  
#  write.table(cbind(d, d.norm,
#                    fc = out$fc,
#                    pval = out$p.value,
#                    pval.BH = p.adjust(out$p.value, method = "BH")),
#              file = "output.txt", row.names = FALSE, sep = "\t")
#  

## -----------------------------------------------------------------------------
#  x <- c(1, 5, 1, 10, 9, 11, 2, 8)
#  
#  tx <- c(19609, 19053, 19235, 19374, 18868, 19018, 18844, 19271)
#  
#  group <- c(rep("cancer", 3), rep("normal", 5))
#  
#  countdata::bb.test(x, tx, group)
#  #> Using a single CPU core ...
#  #> No. of data rows = 1, no. of groups = 2, no. of samples = 8...
#  #> 100%
#  #> Done.
#  #> $p.value
#  #> [1] 0.01568598

## -----------------------------------------------------------------------------
#  d <- read.delim("https://tvpham.github.io/data/example-3groups.txt")
#  
#  # compare 3 groups, using all available CPU cores
#  out <- countdata::bb.test(d[, 1:8],
#                            colSums(d[, 1:8]),
#                            c(rep("a", 3), rep("b", 3), rep("c", 2)))
#  #> Using 11 thread(s) ...
#  #> No. of data rows = 1786, no. of groups = 3, no. of samples = 8...
#  #> 0%
#  #> 5%
#  #> 11%
#  #> 16%
#  #> 21%
#  #> 28%
#  #> 34%
#  #> 40%
#  #> 46%
#  #> 52%
#  #> 58%
#  #> 77%
#  #> 83%
#  #> 88%
#  #> 93%
#  #> 98%
#  #> Done.

## -----------------------------------------------------------------------------
#  pval.BH <- p.adjust(out$p.value, method = "BH")

## -----------------------------------------------------------------------------
#  d.norm <-  countdata::normalize(d[, 1:8])
#  
#  # check -- all values should be equal
#  colSums(d.norm)
#  #>    a1    a2    a3    b1    b2    b3    c1    c2
#  #> 19159 19159 19159 19159 19159 19159 19159 19159

## -----------------------------------------------------------------------------
#  head(cbind(d.norm, out$p.value))
#  #>         a1       a2       a3       b1       b2       b3        c1       c2
#  #> 1 609.6800 498.7595 506.9889 409.4057 400.0766 377.7803 330.43276 286.3262
#  #> 2 600.8866 858.7512 926.3254 337.2158 531.0662 362.6691 365.00111 327.0879
#  #> 3 540.3094 563.1155 742.0564 809.9113 497.5572 484.5661 488.02377 497.0941
#  #> 4 512.9520 414.2921 399.4156 350.0715 325.9508 312.2983 262.31278 226.6749
#  #> 5 472.8929 285.5800 313.7554 265.0259 286.3493 309.2761 274.51337 296.2681
#  #> 6 470.9388 349.9361 398.4195 239.3144 370.6294 369.7209  82.35401 117.3142
#  #>    out$p.value
#  #> 1 0.0001164498
#  #> 2 0.0016150240
#  #> 3 0.4501729545
#  #> 4 0.0003105657
#  #> 5 0.2521494212
#  #> 6 0.0001057092

## -----------------------------------------------------------------------------
#  fc12 <- countdata::fold.change(d.norm[, 1:3], d.norm[, 4:6])
#  fc13 <- countdata::fold.change(d.norm[, 1:3], d.norm[, 7:8])
#  fc23 <- countdata::fold.change(d.norm[, 4:6], d.norm[, 7:8], BIG = 100)
#  
#  head(cbind(d.norm, out$p.value, fc12, fc13, fc23))
#  #>         a1       a2       a3       b1       b2       b3        c1       c2
#  #> 1 609.6800 498.7595 506.9889 409.4057 400.0766 377.7803 330.43276 286.3262
#  #> 2 600.8866 858.7512 926.3254 337.2158 531.0662 362.6691 365.00111 327.0879
#  #> 3 540.3094 563.1155 742.0564 809.9113 497.5572 484.5661 488.02377 497.0941
#  #> 4 512.9520 414.2921 399.4156 350.0715 325.9508 312.2983 262.31278 226.6749
#  #> 5 472.8929 285.5800 313.7554 265.0259 286.3493 309.2761 274.51337 296.2681
#  #> 6 470.9388 349.9361 398.4195 239.3144 370.6294 369.7209  82.35401 117.3142
#  #>    out$p.value      fc12      fc13      fc23
#  #> 1 0.0001164498 -1.360633 -1.746148 -1.283335
#  #> 2 0.0016150240 -1.938309 -2.298320 -1.185735
#  #> 3 0.4501729545 -1.029825 -1.248907 -1.212738
#  #> 4 0.0003105657 -1.342337 -1.808716 -1.347438
#  #> 5 0.2521494212 -1.245834 -1.252351 -1.005232
#  #> 6 0.0001057092 -1.244604 -4.071068 -3.270976

## -----------------------------------------------------------------------------
#  x <- c(33, 32, 86, 51, 52, 149)
#  
#  tx <- c(7742608, 15581382, 20933491, 7126839, 13842297, 14760103)
#  
#  group <- c(rep("cancer", 3), rep("normal", 3))
#  
#  countdata::ibb.test(x, tx, group)
#  #> Using a single CPU core ...
#  #> No. of data rows = 1, no. of pair(s) = 3...
#  #> 100%
#  #> Done.
#  #> $p.value
#  #> [1] 0.004103636
#  #>
#  #> $fc
#  #> [1] 2.137632

## -----------------------------------------------------------------------------
#  d <- read.delim("https://tvpham.github.io/data/example-paired.txt")
#  
#  # perform a paired test for all rows
#  out <- countdata::ibb.test(d[, 1:6],
#                             colSums(d[, 1:6]),
#                             c(rep("pre_treatment", 3), rep("post_treatment", 3)))
#  #> Using 11 thread(s) ...
#  #> No. of data rows = 2919, no. of pair(s) = 3...
#  #> 0%
#  #> 5%
#  #> 10%
#  #> 16%
#  #> 21%
#  #> 26%
#  #> 31%
#  #> 37%
#  #> 42%
#  #> 47%
#  #> 52%
#  #> 57%
#  #> 62%
#  #> 67%
#  #> 73%
#  #> 78%
#  #> 83%
#  #> 88%
#  #> 93%
#  #> 98%
#  #> Done.

## -----------------------------------------------------------------------------
#  d.norm <- countdata::normalize(d[, 1:6])
#  
#  head(cbind(d.norm, out$p.value, out$fc))
#  #>      pre.1    pre.2    pre.3   post.1   post.2   post.3 out$p.value    out$fc
#  #> 1 594.2861 176.8823 347.1786 490.0689 164.2960 208.5462 0.064856368 -1.297663
#  #> 2 303.8611 242.1015 265.3067 384.2916 372.5316 270.9056 0.072525346  1.259570
#  #> 3 302.8275 278.6637 331.6333 361.0012 229.2502 208.5462 0.334902070 -1.168634
#  #> 4 313.1629 278.6637 259.0885 297.9231 232.1159 232.0588 0.045979358 -1.117328
#  #> 5 409.2823 267.7939 177.2166 317.3317 206.3252 105.2954 0.006665643 -1.356184
#  #> 6 245.9828 257.9122 280.8520 237.7562 223.5190 219.7913 0.048216659 -1.151104