inst/doc/Discrete-Trait-Vignette.R

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# library(dowser)
# 
# # load example AIRR tsv data
# data(ExampleAirr)
# 
# # trait value of interest
# trait="biopsy"
# 
# # Process example data using default settings
# clones = formatClones(ExampleAirr,
#     traits=trait, num_fields="duplicate_count", minseq=3)
# 
# # Column shows which biopsy the B cell was obtained from
# print(table(ExampleAirr[[trait]]))
# #Lung Nose
# # 145  244
# 
# # Calculate number of tissues sampled in tree
# tissue_types = unlist(lapply(clones$data, function(x)
#   length(unique(x@data[[trait]]))))
# 
# # Filter to multi-type trees
# clones = clones[tissue_types > 1,]
# 
# # Build trees using maximum likelihood (can use alternative builds if desired)
# trees = getTrees(clones, build="pml")
# 

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# # the location of the igphyml executable
# # this is location in Docker image, will likely be different if you've set it up yourself
# # note this is the location of the compiled executable file, not just the source folder
# igphyml_location = "/usr/local/share/igphyml/src/igphyml"
# 
# # build trees as before, but use IgPhyML to reconstruct the states of internal
# # nodes using maximum parsimony
# trees = getTrees(clones, build="pml", trait=trait, igphyml=igphyml_location)
# 
# # show internal node (edge) predictions based on maximum parsimony
# plotTrees(trees, tips=trait, nodes=TRUE, palette="Set1")[[1]]

## ----eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE----------------------
library(dowser)
# Load data instead of running phylip
data(BiopsyTrees)
plotTrees(BiopsyTrees, tips="biopsy", nodes=TRUE, palette="Set1")[[1]]

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# # the location of the igphyml executable
# # this is location in Docker image, will likely be different if you've set it up yourself
# # note this is the location of the compiled executable file, not just the source folder
# igphyml_location = "/usr/local/share/igphyml/src/igphyml"
# 
# # calculate switches along trees compared to 100 random permutations
# # this may take a while, and can be parallelized using nproc
# switches = findSwitches(trees, permutations=100, trait=trait,
#   igphyml=igphyml_location, fixtrees=TRUE)
# 
# # Perform PS test on switches
# ps = testPS(switches$switches)
# print(ps$means)
# # A tibble: 1 x 6
# #  RECON PERMUTE   PLT DELTA STAT   REPS
# #  <dbl>   <dbl> <dbl> <dbl> <chr> <int>
# #1     8     8.6   0.4  -0.6 PS      100
# 
# 
# sp = testSP(switches$switches, alternative="greater")
# print(sp$means)
# # A tibble: 2 x 8
# # Groups:   FROM [2]
# #  FROM  TO    RECON PERMUTE   PGT  DELTA STAT   REPS
# #  <chr> <chr> <dbl>   <dbl> <dbl>  <dbl> <chr> <int>
# #1 Lung  Nose  0.131   0.323     1 -0.192 SP      100
# #2 Nose  Lung  0.869   0.677     0  0.192 SP      100
# 

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# # calculate switches along bootstrap distribution of trees
# # build using the 'pml' maximum likelihood option
# # in a real analysis it's important to use at least 100 permutations
# switches = findSwitches(trees, permutations=10, trait=trait,
#   igphyml=igphyml_location, fixtrees=FALSE, build="pml")
# 
# sp = testSP(switches$switches, alternative="greater")
# print(sp$means)
# # A tibble: 2 x 8
# # Groups:   FROM [2]
# #  FROM  TO    RECON PERMUTE   PGT  DELTA STAT   REPS
# #  <chr> <chr> <dbl>   <dbl> <dbl>  <dbl> <chr> <int>
# #1 Lung  Nose  0.168   0.358   1   -0.190 SP       10
# #2 Nose  Lung  0.832   0.642   0.1  0.190 SP       10
# 

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# sp = testSP(switches$switches, alternative="greater", permuteAll=TRUE)
# print(sp$means)
# # A tibble: 2 x 8
# # Groups:   FROM [2]
# #  FROM  TO    RECON PERMUTE   PGT   DELTA STAT   REPS
# #  <chr> <chr> <dbl>   <dbl> <dbl>   <dbl> <chr> <int>
# #1 Lung  Nose  0.168   0.241   0.6 -0.0736 SP       10
# #2 Nose  Lung  0.832   0.759   0.4  0.0736 SP       10

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# # Downsample each tree to a tip-to-switch ratio of 10 instead of 20
# # this will reduce the false positive rate but also (likely) power
# switches = findSwitches(trees, permutations=100, trait=trait,
#   igphyml=igphyml_location, fixtrees=TRUE, tip_switch=10)
# 
# # didn't have much effect for this dataset
# sp = testSP(switches$switches, alternative="greater")
# print(sp$means)
# # A tibble: 2 x 8
# # Groups:   FROM [2]
# #  FROM  TO    RECON PERMUTE   PGT  DELTA STAT   REPS
# #  <chr> <chr> <dbl>   <dbl> <dbl>  <dbl> <chr> <int>
# #1 Lung  Nose  0.168   0.358   1   -0.190 SP       10
# #2 Nose  Lung  0.832   0.642   0.1  0.190 SP       10
# 

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# # the location of the igphyml executable
# # this is location in Docker image, will likely be different if you've set it up yourself
# # note this is the location of the compiled executable file, not just the source folder
# igphyml_location = "/usr/local/share/igphyml/src/igphyml"
# 
# # constant region column name
# trait = "c_call"
# 
# # Vector of human isotypes in the proper order. Isotype switching
# # can only go from left to right (e.g. IGHM to IGHA1). One exception
# # is IGHD, which can switch to IGHM.
# isotypes = c("IGHM","IGHD","IGHG3","IGHG1","IGHA1","IGHG2",
#   "IGHG4","IGHE","IGHA2")
# 
# # Process example data using default settings with "c_call" as a trait value
# clones = formatClones(ExampleAirr, traits=trait, minseq=3)
# 
# # Column shows which constant region associated with a BCR
# print(table(ExampleAirr[[trait]]))
# # IGHA1 IGHA2  IGHD IGHG1 IGHG2 IGHG3 IGHG4  IGHM
# #    55    56    11    58    64    60    63    22
# 
# # Calculate number of istoypes sampled in each tree
# isotype_counts = unlist(lapply(clones$data, function(x)
#   length(unique(x@data[[trait]]))))
# 
# # make model file with irreversibility constraints
# # Will prohibit switches from right to left in the "states" vector
# # IGHD to IGHM switching listed as an exception, since this can occur
# makeModelFile(file="isotype_model.txt", states=isotypes,
#   constraints="irrev", exceptions=c("IGHD,IGHM"))
# 
# # Build trees and predict states at internal nodes using maximum parsimony
# trees = getTrees(clones[isotype_counts > 1,], trait=trait, igphyml=igphyml_location,
#   modelfile="isotype_model.txt")
# 
# # show internal node (edge) predictions based on maximum parsimony
# plotTrees(trees, tips=trait, nodes=TRUE, palette="Paired", ambig="grey")[[1]]

## ----eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE----------------------
data(IsotypeTrees)
plotTrees(IsotypeTrees, tips="c_call", nodes=TRUE, palette="Paired", ambig="grey")[[1]]

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# # Downsample each tree to a tip-to-switch ratio of 10 instead of 20
# # this will reduce the false positive rate but also (likely) power
# switches = findSwitches(trees, permutations=100, trait=trait,
#   igphyml=igphyml_location, fixtrees=TRUE, tip_switch=10,
#   modelfile="isotype_model.txt")
# 
# # didn't have much effect for this dataset
# sp = testSP(switches$switches, alternative="greater")
# print(sp$means,n=42)
# # A tibble: 42 × 8
# # Groups:   FROM [7]
# #   FROM  TO      RECON PERMUTE   PGT     DELTA STAT   REPS
# #   <chr> <chr>   <dbl>   <dbl> <dbl>     <dbl> <chr> <int>
# # 1 IGHA1 IGHA2 0.0972  0.0994   0.56 -0.00219  SP      100
# # 2 IGHA1 IGHD  0       0        1     0        SP      100
# # 3 IGHA1 IGHG1 0       0        1     0        SP      100
# # 4 IGHA1 IGHG2 0.00815 0.00833  0.58 -0.000184 SP      100
# # 5 IGHA1 IGHG3 0       0        1     0        SP      100
# # 6 IGHA1 IGHG4 0.00352 0.00409  0.59 -0.000575 SP      100
# # 7 IGHA2 IGHA1 0       0        1     0        SP      100
# # 8 IGHA2 IGHD  0       0        1     0        SP      100
# # 9 IGHA2 IGHG1 0       0        1     0        SP      100
# #10 IGHA2 IGHG2 0       0        1     0        SP      100
# #11 IGHA2 IGHG3 0       0        1     0        SP      100
# #12 IGHA2 IGHG4 0       0        1     0        SP      100
# #13 IGHD  IGHA1 0       0        1     0        SP      100
# #14 IGHD  IGHA2 0       0        1     0        SP      100
# #15 IGHD  IGHG1 0.0144  0.0127   0.42  0.00173  SP      100
# #16 IGHD  IGHG2 0.00926 0.00981  0.54 -0.000545 SP      100
# #17 IGHD  IGHG3 0.0157  0.0113   0.3   0.00440  SP      100
# #18 IGHD  IGHG4 0.00203 0.00271  0.58 -0.000681 SP      100
# #19 IGHG1 IGHA1 0.00632 0.00757  0.59 -0.00126  SP      100
# #20 IGHG1 IGHA2 0.00326 0.00419  0.51 -0.000932 SP      100
# #21 IGHG1 IGHD  0       0        1     0        SP      100
# #22 IGHG1 IGHG2 0.0417  0.0545   0.86 -0.0127   SP      100
# #23 IGHG1 IGHG3 0       0        1     0        SP      100
# #24 IGHG1 IGHG4 0.0451  0.0493   0.66 -0.00427  SP      100
# #25 IGHG2 IGHA1 0       0        1     0        SP      100
# #26 IGHG2 IGHA2 0.00542 0.00511  0.45  0.000316 SP      100
# #27 IGHG2 IGHD  0       0        1     0        SP      100
# #28 IGHG2 IGHG1 0       0        1     0        SP      100
# #29 IGHG2 IGHG3 0       0        1     0        SP      100
# #30 IGHG2 IGHG4 0.0715  0.0581   0.08  0.0133   SP      100
# #31 IGHG3 IGHA1 0.0716  0.0600   0.16  0.0116   SP      100
# #32 IGHG3 IGHA2 0.0508  0.0492   0.37  0.00154  SP      100
# #33 IGHG3 IGHD  0       0        1     0        SP      100
# #34 IGHG3 IGHG1 0.159   0.181    0.96 -0.0225   SP      100
# #35 IGHG3 IGHG2 0.228   0.203    0.05  0.0250   SP      100
# #36 IGHG3 IGHG4 0.161   0.175    0.8  -0.0137   SP      100
# #37 IGHG4 IGHA1 0       0        1     0        SP      100
# #38 IGHG4 IGHA2 0.00596 0.00437  0.27  0.00159  SP      100
# #39 IGHG4 IGHD  0       0        1     0        SP      100
# #40 IGHG4 IGHG1 0       0        1     0        SP      100
# #41 IGHG4 IGHG2 0       0        1     0        SP      100
# #42 IGHG4 IGHG3 0       0        1     0        SP      100

## ----eval=FALSE, warning=FALSE, message=FALSE---------------------------------
# sp = testSP(switches$switches, alternative="greater", to="IGHA2")
# print(sp$means)
# # A tibble: 8 × 8
# # Groups:   FROM [8]
# #  FROM  TO     RECON PERMUTE   PGT    DELTA STAT   REPS
# #  <chr> <chr>  <dbl>   <dbl> <dbl>    <dbl> <chr> <int>
# #1 IGHA1 IGHA2 0.620   0.634   0.59 -0.0144  SP      100
# #2 IGHD  IGHA2 0       0       1     0       SP      100
# #3 IGHE  IGHA2 0       0       1     0       SP      100
# #4 IGHG1 IGHA2 0.0116  0.0292  0.85 -0.0176  SP      100
# #5 IGHG2 IGHA2 0.0371  0.0252  0.32  0.0119  SP      100
# #6 IGHG3 IGHA2 0.297   0.283   0.45  0.0144  SP      100
# #7 IGHG4 IGHA2 0.0346  0.0290  0.36  0.00563 SP      100
# #8 IGHM  IGHA2 0       0       1     0       SP      100

Try the dowser package in your browser

Any scripts or data that you put into this service are public.

dowser documentation built on Nov. 5, 2025, 6:36 p.m.