R/sub__GetTipInfo.R

Defines functions GetHeight2MutNum GetTip2Height GetTip2SNP GetTip2AllMut GetMutInfoFromSeq LoadInRef LoadInCentroid

Documented in LoadInCentroid

#' Subfunctions for GetTipInfo.R
#'
#'

# load in raw ccs data.
LoadInCentroid <- function(FFFly){
  system.file("TreeData", paste0(FFFly,"_centroid.tsv"), package = "NpDynamic") %>%
    read.delim(head=F,col.names=c("TID","Seq"),sep=" ") %>% tbl_df
}

# load in reference sequence.
LoadInRef <- function(){
  "AACTAGCTGGCTAGCAGGTAAACCTGCCAGCCTGCCGGCTCAGGTGAGCCAGTTAGTAGGCAAGTAAGCTCACCTGTAGGGGCTTTGGAGCAGGTATTGGAGTACAGGTGTAGGTTGGAGTTAGCCAGTAGGTTCACCTGATTACCCTGTTATCCCTACAGGTGAGCAGGCTAGCAAGTAGGTTCCAATGCCGGCTGGTAAGCATACCAACTCCAAAGTTCACCTGCAGGTGTAGGTACCTAGGCACCTGCACCTGGGCATAGGTGCTCCTAAGCTAGCAAACCGGTACCTATACTCAGGTGAGCTAGCAAGCTCAGGTGTAGGGATAACAGGGTAATAGCTAACCTACTAGTTGGCTAACCCCAACCAATACTTAGGAGCTGGCAGGCTAGTTTACTAGCTCAGGTGCAGGTGAGTAAGTACACCTGTGCCAGTAAGCACCTAAGCCAACCAGCCCAGGTGAGCCAACTTGCTGGCAAACCTACTGGTATACCATTACCCTGTTATCCCTAAGCTGGTAAGCTTACCCCTATACTCACCTGTGCCAGCCCAGGTGAGCAAGTTGGTATACCCACCTGCAGGTGAGTAGGCTAGTAAGCTAGCTAGTATGCTAGCTGGTTAGTTTGCCGGCTGGCTCCAAAACTAGTTGGTTGGCTCAGGTGTGCCGGTTTAGGGATAACAGGGTAATTGCTCCTACAGGTGAGTAGGCTTACCAGCTCAGGTGAGCAAGCTTGCTCCAATAGGTAGGTTGGAGCATGCCAGTTAGCTTTGGAGCTCAGGTGAGTTTGCCAGTAGGTAAACTAGTATACTTGCTAGCTGGCAAGCCGGTTAGTAGGCTCCTAATTACCCTGTTATCCCTACCAAAACCTGCCCCTAAGCTAGTATAGGAGCCGGTTAGCCAACCAGTACCAACCTAAGCACACCTGAGCTAGCAAACTAGTACCTATACTTGCCAGCAGGCTAGCTTACCAGTAAGTAGGCACAGGTGTGCCCCTAAGCCAGCTGGCAAGCTTAGGGATAACAGGGTAATGGCTGGCTTGCCAGCAGGTTTACCAACTAACCTAGGAACCAACTAACTTGCTCCAAAGCAAGCAAACTCACCTGGGCATGCCCCTAAGCTAGTAAACCCAGGTGAGCAGGTAGGTAAGTTTACCAGCCAACTTACCCAGGTGAACCAGTTCACCTGATTACCCTGTTATCCCTATGCTAGCATACTTGCTTGCCGGCATGCTTGCTAGTACCAAAACTAGCTGGTTGGCACAGGTGGGCTTGCTTAGGCACCTGAGCAGGCAGGCTAGTACCTAAGCCAACCGGCAAGTAAGTTAGTAGGCTCCAAAGTTCAGGTGTTGGAGTTAACTTAGGGATAACAGGGTAATAGTAGGTAGGTTAGCTGGTTAGTAAGCTTGCCTTGGAGCTTGCTAGTTTGCTAGTTTACCAACTAACCGGCAAGTTAACTTTGGCACCTGTTGGTAGGCCTAAGCTTGCCAGCCCACCTGAACCTGCCCAGGTGGGCACACCTGAGTATGCCTTGGATTACCCTGTTATCCCTAAGCACACCTGAGCAAGCTAGTACAGGTGCACCTGCAGGTGCCTACACCTGGGTAGGCTAACTCACCTGTGCCTGCCTGCTGGCACACCTGAACTGGTTGGCACCTATGCCAGCTTGCCAACCGGCTTAGGTAGGTACCAGCCGGTATACTAGCTAACTAACCTAGGGATAACAGGGTAATCACCTGAGTAAACCCCTAGGTAAGTACAGGTGTACCAGCTGGTTGGTTCCAACCTAAGCTTTGGTTGGTGCCGGCTGGTTTACCGGTATACTCCAACACCTGAGCTGGTACCTAGGCTTACTCACCTGCAGGTGGGCTGGTACCTATGCCAACCAACCATTACCCTGTTATCCCTACACCTGTTGGAGCTTTGGCACCTGAGCACACCTGGGCTGGCATGCTTAGGCACCTGGGTAGGCTTAGGCAGGTGAGCAGGCTAGCTGGTAGGTTAGCCGGTACACCTGAGTTTACTCAGGTGCCTAAGCTGGTTTAGGAGCTGGTATAGGGGCATTGGAGCATAGGGATAACAGGGTAATGGCTGGCAGGTTAACCAACTAACCAACTCCTAAGCCGGTAGGCTAGCTAGCATACCTGCTAGCCCCAACACCTGTACCAGCAGGCAAGCTGGCTCCTAAACTAGTACAGGTGAACCTGCCGGCTAGCTAGCTTAGGGGCTAGCCAGTAGGTTATTACCCTGTTATCCCTAAGCTAGCCTGCCAGCTCCTATGCTAGTTAGCAAGCTGGTAGGCTGGCTAGCCTGCCTACTTACCGGTTGGTAGGTAAACCCACCTGAGCATGCCGGTATGCCTAGGGGCTTGCCTGCCAGCCAACCTAGGTGCTGGCACCTATGCCTACTTAGGGATAACAGGGTAATAACTGGCTCCAACACCTGTACTAGCAAGCTTGCCAGCAAGTATAGGCACCTGAGCTAACTAGCTTAGGAACCCACCTGGGCATAGGAACCAGCTAGTTAGCTCCAAAGCTAACCCCTAGGTTGGTTTGCCAGCACACCTGTACTTACCCACCTGTACTATTACCCTGTTATCCCTAAGTTAACTCCTAAGCCCACCTGTACCAACCAGTAGGCATTGGAGTTGGCTGGTACCTAGGCTGGCTAGCCAGCTGGTAAGCAAGCAAGTTTACCCAGGTGGGCTCCTACAGGTGAGCTCCTAAGCTCACCTGGGTACCAAGGCTGGCAAGCAAGCCTAGGGATAACAGGGTAATAGCTGGCTAGTTGGTAGGCTAGCTTAGGGGCTGGCTAACCAGCAGGTAAGTAAGCACCAAAGCAGGTTGGTAAACCTTGGCAGGTGAGTTGGCTAGCTTTGGAACTAGCCAGTTTACCTAGGAACTAGTTCCTAAGCTAGTAGGTTAGTATCTAGAGGATCTTTGTGAA" %>%
    strsplit("") %>% .[[1]] %>% as.data.frame %>% tbl_df %>%
    mutate(Posi=1:n()) %>% select(Posi,Base=".")
}

# Extract mutation information.
GetMutInfoFromSeq <- function(SSSeq,RRRef){
  strsplit(SSSeq,"") %>% .[[1]] %>% tbl_df %>%
    cbind(RRRef) %>% tbl_df %>%
    mutate(MutInfo=paste0(Base,">",value),MutInfo=replace(MutInfo,Base==value,".")) %>%
    select(Posi,MutInfo)
}

# Map mutation information to tips
GetTip2AllMut <- function(FFFly,RRRawReads,RRRef){
  tmp.out <-
    lapply(1:nrow(RRRawReads$Tip2Seq),function(iii){
      RRRawReads$Tip2Seq$Seq[[iii]] %>%
        GetMutInfoFromSeq(.,RRRef) %>%
        filter(MutInfo!=".") %>% mutate(Tip=RRRawReads$Tip2Seq$Tip[[iii]]) }) %>%
    bind_rows
  # remove hotspot
  if(FFFly=="L5"){
    tmp.out <- tmp.out %>% filter(!(Posi==356 & MutInfo=="G>A"))
  }
  return(tmp.out)
}

# Map SNP information to tips
GetTip2SNP <- function(TTTip2AllMut,TTTreeAnn){
  TTTip2AllMut %>%
    left_join(TTTreeAnn$TipInfo %>% select(Tip,Organ)) %>%
    slice(grep("-",MutInfo,invert=T))
}

# Extract tree height for each tip
GetTip2Height <- function(TTTreeAnn){
  TTTreeAnn$Node2Height %>% filter(Node<=length(TTTreeAnn$Tree$tip.label)) %>%
    left_join(TTTreeAnn$Node2Tips) %>%
    left_join(TTTreeAnn$TipInfo %>% select(Tip,Organ)) %>%
    select(Tip,Organ,NodeHeight)
}

# Fit mutation number and tree height with linear model
GetHeight2MutNum <- function(TTTip2SNP,TTTip2Height){
  TTTip2SNP %>%
    group_by(Tip,Organ) %>% summarise(Count=n()) %>%
    group_by %>%
    left_join(TTTip2Height) %>%
    select(Count,NodeHeight) %>%
    filter(percent_rank(NodeHeight)<=0.95) %>%
    lm
}
shadowdeng1994/NpDynamic documentation built on Dec. 23, 2021, 1:17 a.m.