inst/doc/Mutation-Vignette.R

## ----eval=TRUE, warning=FALSE, message=FALSE----------------------------------
# Import required packages
library(alakazam)
library(dplyr)
library(ggplot2)
library(shazam)

# Load and subset example data
data(ExampleDb, package="alakazam")
db <- subset(ExampleDb, c_call %in% c("IGHA", "IGHG") & sample_id == "+7d")

## ----eval=TRUE----------------------------------------------------------------
# Calculate R and S mutation counts
db_obs <- observedMutations(db, sequenceColumn="sequence_alignment",
                            germlineColumn="germline_alignment_d_mask",
                            regionDefinition=NULL,
                            frequency=FALSE, 
                            nproc=1)
# Show new mutation count columns
db_obs %>% 
    select(sequence_id, starts_with("mu_count_")) %>%
    head(n=4)

# Calculate R and S mutation frequencies
db_obs <- observedMutations(db_obs, sequenceColumn="sequence_alignment",
                            germlineColumn="germline_alignment_d_mask",
                            regionDefinition=NULL,
                            frequency=TRUE, 
                            nproc=1)
# Show new mutation frequency columns
db_obs %>% 
    select(sequence_id, starts_with("mu_freq_")) %>%
    head(n=4)

## ----eval=TRUE----------------------------------------------------------------
# Calculate combined R and S mutation frequencies
db_obs <- observedMutations(db, sequenceColumn="sequence_alignment",
                            germlineColumn="germline_alignment_d_mask",
                            regionDefinition=NULL,
                            frequency=TRUE, 
                            combine=TRUE,
                            nproc=1)
# Show new mutation frequency columns
db_obs %>% 
    select(sequence_id, starts_with("mu_freq")) %>%
    head(n=4)

## ----eval=TRUE, warning=FALSE-------------------------------------------------
g1 <- ggplot(db_obs, aes(x=c_call, y=mu_freq, fill=c_call)) +
        geom_boxplot() + 
        labs(title = "Total mutations", 
             x = "Isotype", y = "Mutation frequency") +
        scale_fill_manual(name="Isotype", values=IG_COLORS, limits=force) +
        theme_bw() 
plot(g1)

## ----eval=TRUE----------------------------------------------------------------
# Calculate R and S mutation counts for individual CDRs and FWRs
db_obs_v <- observedMutations(db, sequenceColumn="sequence_alignment",
                              germlineColumn="germline_alignment_d_mask",
                              regionDefinition=IMGT_V_BY_REGIONS,
                              frequency=FALSE, 
                              nproc=1)
# Show new FWR mutation columns
db_obs_v %>% 
    select(sequence_id, starts_with("mu_count_fwr")) %>%
    head(n=4)

# Calculate aggregate CDR and FWR V-segment R and S mutation frequencies
db_obs_v <- observedMutations(db_obs_v, sequenceColumn="sequence_alignment",
                              germlineColumn="germline_alignment_d_mask",
                              regionDefinition=IMGT_V,
                              frequency=TRUE, 
                              nproc=1)
# Show new CDR and FWR mutation frequency columns
db_obs_v %>% 
    select(sequence_id, starts_with("mu_freq_")) %>%
    head(n=4)

## ----eval=TRUE, warning=FALSE-------------------------------------------------
g2 <- ggplot(db_obs_v, aes(x=c_call, y=mu_freq_cdr_s, fill=c_call)) +
        geom_boxplot() + 
        labs(title = "CDR silent mutations", 
             x = "Isotype", y = "Mutation frequency") +
        scale_fill_manual(name="Isotype", values=IG_COLORS, limits=force) +
        theme_bw()
g3 <- ggplot(db_obs_v, aes(x=c_call, y=mu_freq_cdr_r, fill=c_call)) +
        geom_boxplot() + 
        labs(title = "CDR replacement mutations", 
           x = "Isotype", y = "Mutation frequency") +
        scale_fill_manual(name="Isotype", values=IG_COLORS, limits=force) +
        theme_bw()
alakazam::gridPlot(g2, g3, ncol=2)

## ----eval=TRUE----------------------------------------------------------------
# Calculate charge mutation frequency for the full sequence
db_obs_ch <- observedMutations(db, sequenceColumn="sequence_alignment",
                               germlineColumn="germline_alignment_d_mask",
                               regionDefinition=NULL,
                               mutationDefinition=CHARGE_MUTATIONS,
                               frequency=TRUE, 
                               nproc=1)
# Show new charge mutation frequency columns
db_obs_ch %>% 
    select(sequence_id, starts_with("mu_freq_")) %>%
    head(n=4)

## ----eval=TRUE, warning=FALSE-------------------------------------------------
g4 <- ggplot(db_obs_ch, aes(x=c_call, y=mu_freq_seq_r, fill=c_call)) +
        geom_boxplot() + 
        labs(title="Charge replacement mutations", 
             x="Isotype", y="Mutation frequency") +
        scale_fill_manual(name="Isotype", values=IG_COLORS, limits=force) + 
        theme_bw()
plot(g4)

Try the shazam package in your browser

Any scripts or data that you put into this service are public.

shazam documentation built on Oct. 3, 2023, 1:06 a.m.