R/Echidna_other_functions.R
In Platypus: Single-Cell Immune Repertoire and Gene Expression Analysis

Documented in clonofreq clonofreq.isotype.data clonofreq.isotype.plot clonofreq.trans.data clonofreq.trans.plot cluster.id.igraph get.avr.mut.data get.avr.mut.plot get.barplot.errorbar get.elbow get.n.node.data get.n.node.plot get.seq.distance get.umap get.vgu.matrix no.empty.node select.top.clone umap.top.highlight

#' Plot the top abundant clonal frequencies in a barplot with ggplot2
#' @title Plot clonal frequency barplot of the outout simulated data
#' @param clonotypes The clonotypes dataframe, which is the second element in the simulation output list.
#' @param top.n The top n abundant clones to be shown in the plot. If missing, all clones will be shown.
#' @param y.limit The upper limit for y axis in the plot.
#' @return top abundant clonal frequencies in a barplot with ggplot2
#' @export
clonofreq<-function(clonotypes, top.n, y.limit){
  Clonotypes<-frequency <- NULL
  #require(ggplot2,dplyr)
  clonotypes<-clonotypes[order(clonotypes[,2],decreasing = T),]
  if(!(missing(top.n))) {
    clonotypes<-clonotypes[1:top.n,]
  }
  clonotypes$Clonotypes<-factor(c(1:nrow(clonotypes)))
  p<- ggplot2::ggplot( clonotypes, ggplot2::aes(x=Clonotypes, y=frequency)) +
    ggplot2::xlab("Clonotypes")+
    ggplot2::ylab("Cells")+
    ggplot2::geom_bar(stat="identity")+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))+
    ggplot2::theme(text = ggplot2::element_text(size=20))+
    ggplot2::scale_x_discrete(expand = c(0,0))

    if(!missing(y.limit)){

    p<- ggplot2::ggplot( clonotypes, ggplot2::aes(x=Clonotypes, y=frequency)) +
    ggplot2::xlab("Clonotypes")+
    ggplot2::ylab("Cells")+
    ggplot2::geom_bar(stat="identity")+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))+
    ggplot2::theme(text = ggplot2::element_text(size=20))+
    ggplot2::scale_x_discrete(expand = c(0,0))+
    ggplot2::scale_y_continuous(expand = c(0,0),limit = c(0, y.limit))

    }


  return(p)
}

#' Plot a stacked barplot for clonotype counts grouped by isotype.
#' @title Get information about the clonotype counts grouped by isotype.
#' @param all.contig.annotations The output dataframe all_contig_annotation from function simulate.repertoire.
#' @param top.n The top n abundant clones to be shown in the plot. If missing, all clones will be shown.
#' @param colors A named character vector of colors, the names are the isotypes. If missing, the default has 11 colors coresponding to the default isotype names.
#' @param y.limit The upper limit for y axis in the plot.
#' @return a stacked barplot for clonotype counts grouped by isotype
#' @export
clonofreq.isotype.plot<-function(all.contig.annotations,top.n,y.limit,colors){
  #require(dplyr,ggplot2)
  Clonotypes<-c_gene<-count<-NULL
  if(missing(colors))  {
    colors<-c("#377EB8","#4DAF4A",	"#984EA3",	"#FF7F00",	"#FFFF33",	"#A65628",	"#F781BF",	"#999999","#E41A1C","#984EA3",	"#FFFF33")
    names(colors)<-c("IGHM","IGHD","IGHG1","IGHG2A","IGHG2B","IGHG2C","IGHG3","IGHE",	"IGHA",	"IGHG2",	"IGHG4")
  }
  heavy_df<-all.contig.annotations[grep(all.contig.annotations$c_gene,pattern = "IGH"),]
  clonotypes<-as.data.frame(table(heavy_df$raw_clonotype_id))
  colnames(clonotypes)[1]<-"raw_clonotype_id"
  clonotypes<-clonotypes[order(clonotypes$Freq,decreasing = T),]
  clonotypes$Clonotypes<-factor(c(1:nrow(clonotypes)))
  heavy_df<-merge(heavy_df,clonotypes,all = T)
    if(!missing(top.n)){
    clonotypes<-clonotypes[1:top.n,]
    heavy_df<-heavy_df[heavy_df$raw_clonotype_id %in% clonotypes$raw_clonotype_id,]
    }


  iso<-dplyr::summarise(dplyr::group_by(heavy_df,Clonotypes,c_gene),count = dplyr::n())
  p<-ggplot2::ggplot(iso,ggplot2::aes(x=Clonotypes, y=count, fill = c_gene))+
    ggplot2::ylab("Cells")+
    ggplot2::xlab("Clonotypes")+
    ggplot2::geom_bar(stat="identity")+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))+
    ggplot2::theme(text = ggplot2::element_text(size=20))+
    ggplot2::scale_x_discrete(expand = c(0,0))+
    ggplot2::scale_y_continuous(expand = c(0,0),limit = c(0, y.limit))+
    ggplot2::scale_fill_manual(values = colors,name = "Isotype")
  return(p)
}

#'Return
#' @title Get information about the clonotype counts grouped by isotype.
#' @param all.contig.annotations The output dataframe all_contig_annotation from function simulate.repertoire.
#' @param top.n The top n abundant clones to be shown in the plot. If missing, all clones will be shown.
#' @return dataframes containing the top n abundant clonotypes and their frequency and isotype information for further processing.
#' @export
clonofreq.isotype.data<-function(all.contig.annotations,top.n){
  #require(dplyr)
  raw_clonotype_id<-c_gene<-NULL
  heavy_df<-all.contig.annotations[grep(all.contig.annotations$c_gene,pattern = "IGH"),]

  clonotypes<-as.data.frame(table(heavy_df$raw_clonotype_id))
  colnames(clonotypes)[1]<-"raw_clonotype_id"
  heavy_df<-merge(heavy_df,clonotypes,all = T)
  clonotypes<-clonotypes[order(clonotypes$Freq,decreasing = T),]

  if(!missing(top.n)){
    clonotypes<-clonotypes[1:top.n,]
    heavy_df<-heavy_df[heavy_df$raw_clonotype_id %in% clonotypes$raw_clonotype_id,]
  }

  iso<-dplyr::summarise(dplyr::group_by(heavy_df,raw_clonotype_id,c_gene),count = dplyr::n())
  return(iso)
}


#' Return a barplot of mean and standard error bar of certain value of each clone.
#' @param data A dataframe. Columns are different simulations, rows are the top clones. The first row is the top abundant clone.
#' @param y.lab A string specifies the y lable name of the barplot.
#' @param y.limit The upper limit for y axis in the plot.
#' @return a barplot of mean and standard error bar of certain value of each clone.
#' @export
get.barplot.errorbar<-function(data,y.lab,y.limit){
  sd<-NULL
  #require(ggplot2)
  Clones<-as.factor(c(1:nrow(data)))
  Mean<-rowMeans(data)
  se<-apply(data, 1,sd)/ncol(data)^0.5
  df<-data.frame(Clones,Mean,se)
  p<-ggplot2::ggplot(df,ggplot2::aes(x=Clones, y=Mean)) +
    ggplot2::ylab(y.lab)+
    ggplot2::xlab("Clonotypes")+
    ggplot2::geom_bar(stat="identity",  position=ggplot2::position_dodge())+
    ggplot2::geom_errorbar(ggplot2::aes(ymin=Mean-se, ymax=Mean+se), width=.2,position=ggplot2::position_dodge(.9))+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))+
    ggplot2::theme(text = ggplot2::element_text(size=20))+
    ggplot2::scale_x_discrete(expand = c(0,0))+
    ggplot2::scale_y_continuous(expand = c(0,0),limit = c(0, y.limit))
  return(p)
}

#' Get the index of top ranking clones.
#' @param clonotypes The output "clonotypes" dataframe from simulation output.
#' @param top.n The top n abundant clones to be selected.
#' @return a vector of indexes of top ranking clones
#' @export
select.top.clone<-function(clonotypes,top.n){
  #require(dplyr,stringr)
  clonotypes<-clonotypes[order(clonotypes$frequency,decreasing = T),][1:top.n,]
  selected<-as.numeric(stringr::str_replace(string=clonotypes$clonotype_id,pattern = "clonotype",replacement = ""))
  return(selected)
}

#' Get the number of unique variants in each clone in a vector and the barplot. The first item in the output is the vector representing the numbers of unique variants, the second item is the barplot.
#' @param igraph.index.attr The output "igraph.index.attr" list from simulation output.
#' @param clonotype.select The index of the clones to be shown. If missing, all clones will be included.
#' @param y.limit The upper limit for y axis in the plot.
#' @return the number of unique variants in each clone in a vector and the barplot. The first item in the output is the vector representing the numbers of unique variants, the second item is the barplot.
#' @export
get.n.node.plot<-function(igraph.index.attr,clonotype.select,y.limit){
 #require(ggplot2)
  Clonotypes<-unique_variants<-NULL
  n_node<-c()
  for(i in 1:length(igraph.index.attr)){
    if(length(igraph.index.attr[[i]])>1){
    n_node[i]<-nrow(unique(igraph.index.attr[[i]]))
    }else
      n_node[i]<-1
  }
  if(!(missing(clonotype.select))){
    n_node<-n_node[clonotype.select]
  }
  df<-data.frame(factor(c(1:length(n_node))),n_node)
  colnames(df)<-c("Clonotypes","unique_variants")
  p<- ggplot2::ggplot(df, ggplot2::aes(x=Clonotypes, y=unique_variants)) +
    ggplot2::xlab("Clonotypes")+
    ggplot2::ylab("Unique Variants")+
    ggplot2::geom_bar(stat="identity")+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))+
    ggplot2::theme(text = ggplot2::element_text(size=20))+
    ggplot2::scale_x_discrete(expand = c(0,0))+
    ggplot2::scale_y_continuous(expand = c(0,0),limit = c(0, y.limit))
  return(p)
}

#' Get the number of unique variants in each clone in a vector. The output is the vector representing the numbers of unique variants.
#' @param data The output "igraph.index.attr" list from simulation output.
#' @param clonotype.select The index of the clones to be shown. If missing, all clones will be included.
#' @return the number of unique variants in each clone in a vector. The output is the vector representing the numbers of unique variants.
#' @export
get.n.node.data<-function(data,clonotype.select){
  n_node<-c()
  for(i in 1:length(data)){
    if(length(data[[i]])>1){
      n_node[i]<-nrow(unique(data[[i]]))
    }else
      n_node[i]<-1
  }
  if(!(missing(clonotype.select))){
    n_node<-n_node[clonotype.select]
  }
  df<-data.frame(paste0("clonotype",clonotype.select),n_node)
  colnames(df)<-c("clonotype_id","unique_variants")
  return(n_node)
}

#' Get the seurat object from simulated transciptome output.
#' @param data The output "transcriptome" dataframe from simulation output.
#' @return the seurat object from simulated transciptome output.
#' @export

get.elbow<-function(data){
  gex <- Seurat::CreateSeuratObject(counts = data)
  gex <- Seurat::NormalizeData(gex, normalization.method = "LogNormalize", scale.factor = 10000)
  gex <- Seurat::FindVariableFeatures(gex, selection.method = "vst", nfeatures = 2000)
  all.genes <- rownames(gex)
  gex <- Seurat::ScaleData(gex,features = all.genes)
  gex <- Seurat::RunPCA(gex, features = Seurat::VariableFeatures(object = gex))
  return(gex)
}

#' Further process the seurat object from simulated transciptome output and make UMAP ready for plotting.
#' @param gex output from get.elbow function.
#' @param d dims argurment of in Seurat::FindNeighbors() and Seurat::RunUMAP
#' @param reso resolution argument in Seurat::FindClusters()
#' @return Further processed seurat object from simulated transciptome output with UMAP ready for plotting.
#' @export
get.umap<-function(gex,d,reso){
  #require(Seurat)
  gex <- Seurat::FindNeighbors(gex, dims = 1:d)
  gex <- Seurat::FindClusters(gex, resolution = reso)
  gex <- Seurat::RunUMAP(gex, dims = 1:d)
  return(gex)
}





#' Computing sequence distance according to the number of unmatched bases.
#' @param germline A string representing the germline sequence.
#' @param sequence  A string of the sequence to be compared, which has the same length as germline.
#' @return the number of unmatched bases in 2 sequences.
#' @export
get.seq.distance<-function(germline, sequence){

  a<-strsplit(germline,split = "")[[1]]
  b<-strsplit(sequence,split = "")[[1]]
  dis<-sum(a!=b)
  return(dis)
}

#' Get information about somatic hypermutation in the simulation. This function return a barplot showing the average mutation.
#' @param igraph.index.attr A list "igraph.index.attr" from the simulation output.
#' @param history  A dataframe "history" from the simulation output.
#' @param clonotype.select The selected clonotype index, can be the output of the function "select.top.clone".
#' @param level Can be "node" or "cell". If "node", the function will return average mutation on unique variant level. Otherwise it will return on cell level.
#' @param y.limit The upper limit for y axis in the plot.
#' @return a barplot showing the average mutation per node (same heavy and light chain set) or per cell.
#' @export
get.avr.mut.plot<-function(igraph.index.attr,history,clonotype.select,level, y.limit){
  #require(stats,ggplot2)
  Clonotypes<-Mean<-Se<-NULL
  history<-as.data.frame(history)
  mut_by_node_mean<-c()
  mut_by_node_se<-c()
  mut_by_cell_mean<-c()
  mut_by_cell_se<-c()

  for (u in 1:length(clonotype.select)){

    dis_vec<-c()
    i<-clonotype.select[u]
    if ( length(igraph.index.attr[[i]])>1){
      igraph.index.attr[[i]]<-igraph.index.attr[[i]][-1,]
      rownames(igraph.index.attr[[i]])<-c(1:nrow(igraph.index.attr[[i]]))
      germ_seq_num<-igraph.index.attr[[i]][1,1]
      germ_seq<-history[which(history[,1]==germ_seq_num)[1],3]
      mut_seq_num<-igraph.index.attr[[i]][-1,1]

      for (j in 1:length(mut_seq_num)){
        mut_seq<-history[which(history[,1]==mut_seq_num[j])[1],3]
        dis_vec[j]<-get.seq.distance(germ_seq,mut_seq)
      }
      mut_by_node_mean[u]<-mean(dis_vec)
      mut_by_node_se[u]<-stats::sd(dis_vec)/length(dis_vec)^0.5
      mut_by_cell_mean[u]<-mean(rep(dis_vec,igraph.index.attr[[i]][["size"]][-1]))
      mut_by_cell_se[u]<-stats::sd(rep(dis_vec,igraph.index.attr[[i]][["size"]][-1]))/sum(igraph.index.attr[[i]][["size"]][-1])^0.5
    }

    if ( length(igraph.index.attr[[i]])==1){
      mut_by_node_mean[u]<-0
      mut_by_node_se[u]<-0
      mut_by_cell_mean[u]<-0
      mut_by_cell_se[u]<-0
    }
  }
  if(level=="node"){
  mut<-data.frame(clonotype.select,mut_by_node_mean,mut_by_node_se)
  }
  if(level=="cell"){
  mut<-data.frame(clonotype.select,mut_by_cell_mean,mut_by_cell_se)
  }
  colnames(mut)<-c("clonotype_id","Mean","Se")
  mut$Clonotypes<-factor(c(1:nrow(mut)))
  p<- ggplot2::ggplot(mut, ggplot2::aes(x=Clonotypes, y=Mean)) +
    ggplot2::xlab("Clonotypes")+
    ggplot2::ylab("Average mutation")+
    ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge())+
    ggplot2::geom_errorbar( ggplot2::aes(ymin=Mean-Se, ymax=Mean+Se), width=.2,
                            position= ggplot2::position_dodge(.9))+ ggplot2::theme_minimal()+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x =  ggplot2::element_text(angle = 45, hjust = 1))+
    ggplot2::theme(text = ggplot2::element_text(size=20))+
    ggplot2::scale_x_discrete(expand = c(0,0))+
    ggplot2::scale_y_continuous(expand = c(0,0),limit = c(0, y.limit))
  return(p)
}

#' Get information about somatic hypermutation in the simulation. This function return a barplot showing the average mutation.
#' @param igraph.index.attr A list "igraph.index.attr" from the simulation output.
#' @param history  A dataframe "history" from the simulation output.
#' @param clonotype.select The selected clonotype index, can be the output of the function "select.top.clone.clone".
#' @param level Can be "clone" or "cell". If "clone", the function will return average mutation on unique variant level. Otherwise it will return on cell level.
#' @return a bar plot showing the average mutation on clone or cell level.
#' @export
get.avr.mut.data<-function(igraph.index.attr,history,clonotype.select,level){
  #require(stats)
  history<-as.data.frame(history)
  mut_by_node_mean<-c()
  mut_by_node_se<-c()
  mut_by_cell_mean<-c()
  mut_by_cell_se<-c()

  for (u in 1:length(clonotype.select)){
    dis_vec<-c()
    i<-clonotype.select[u]

    if ( length(igraph.index.attr[[i]])>1){
      igraph.index.attr[[i]]<-igraph.index.attr[[i]][-1,]
      rownames(igraph.index.attr[[i]])<-c(1:nrow(igraph.index.attr[[i]]))

      germ_seq_num<-igraph.index.attr[[i]][1,1]
      germ_seq<-history[which(history[,1]==germ_seq_num)[1],3]
      mut_seq_num<-igraph.index.attr[[i]][-1,1]

      for (j in 1:length(mut_seq_num)){
        mut_seq<-history[which(history[,1]==mut_seq_num[j])[1],3]
        dis_vec[j]<-get.seq.distance(germ_seq,mut_seq)
      }
      mut_by_node_mean[u]<-mean(dis_vec)
      mut_by_node_se[u]<-stats::sd(dis_vec)/length(dis_vec)^0.5
      mut_by_cell_mean[u]<-mean(rep(dis_vec,igraph.index.attr[[i]][["size"]][-1]))
      mut_by_cell_se[u]<-stats::sd(rep(dis_vec,igraph.index.attr[[i]][["size"]][-1]))/sum(igraph.index.attr[[i]][["size"]][-1])^0.5
    }

    if ( length(igraph.index.attr[[i]])==1){
      mut_by_node_mean[u]<-0
      mut_by_node_se[u]<-0
      mut_by_cell_mean[u]<-0
      mut_by_cell_se[u]<-0
    }
  }
  if(level=="node"){
    mut<-data.frame(clonotype.select,mut_by_node_mean,mut_by_node_se)
  }
  if(level=="cell"){
    mut<-data.frame(clonotype.select,mut_by_cell_mean,mut_by_cell_se)
  }
  return(mut)
}

#' Get paired v gene heavy chain and light chain matrix on clonotype level. A v gene usage pheatmap can be obtain by p<-pheatmap::pheatmap(vgu_matrix,show_colnames= T, main = "V Gene Usage"), where the vgu_matrix is the output of this function.
#' @param all.contig.annotations The dataframe "all_contig_annotation" from simulation output.
#' @param level Can be "clone" or "cell". If "clone", the function will return paired v gene usage matrix on clonotype level. Otherwise it will return on cell level.
#' @return a paired v gene heavy chain and light chain matrix on clonotype level.
#' @export
get.vgu.matrix<-function(all.contig.annotations, level){
  #require(stringr,reshape2)
  h_df<-subset(all.contig.annotations, stringr::str_detect(pattern="contig_1",string=all.contig.annotations$raw_contig_id),select= c("barcode","raw_clonotype_id","v_gene"))
  l_df<-subset(all.contig.annotations, stringr::str_detect(pattern="contig_2",string=all.contig.annotations$raw_contig_id),select= c("barcode","v_gene"))
  colnames(h_df)<-stringr::str_replace(string = colnames(h_df),pattern = "v_gene",replacement = "v_h")
  colnames(l_df)<-stringr::str_replace(string = colnames(l_df),pattern = "v_gene",replacement = "v_l")
  df<-merge(h_df,l_df,by="barcode")
  if(level=="clone"){
    vgu_matrix<-reshape2::dcast(unique(df[,-1]),v_h~v_l,value.var = "raw_clonotype_id",length)
  }
  if(level=="cell"){
    vgu_matrix<-reshape2::dcast(df,v_h~v_l,value.var = "barcode",length)
  }
  rownames(vgu_matrix)<-vgu_matrix[,1]
  vgu_matrix<-as.matrix(vgu_matrix[,-1])
  return(vgu_matrix)
}

#' Plot a stacked barplot for clonotype counts grouped by transcriptome state(cell type).
#' @title Get information about the clonotype counts grouped by transcriptome state(cell type).
#' @param all.contig.annotations The output dataframe all_contig_annotation from function simulate.repertoire.
#' @param history The dataframe history from simulate output.
#' @param trans.names The names of cell types which are used in transcriptome.switch.prob argument in the simulation.
#' @param top.n The top n abundant clones to be shown in the plot. If missing, all clones will be shown.
#' @param y.limit The upper limit for y axis in the plot.
#' @param colors A named character vector of colors, the names are the isotypes. If missing, the default has 11 colors coresponding to the default isotype names.
#' @return a stacked barplot for clonotype counts grouped by transcriptome state(cell type).
#' @export
clonofreq.trans.plot<-function(all.contig.annotations,history,trans.names,top.n,y.limit,colors){
  #require(stringr,dplyr,ggplot2)
  Clonotypes<-trans_names<-count<-NULL
  if(missing(colors)) {
    colors<-c("#377EB8","#4DAF4A",	"#984EA3",	"#FF7F00","#FFFF33", "#A65628", "#F781BF", "#999999")[1:length(trans.names)]
    names(colors)<-trans.names
  }
  trans.names<-data.frame(c(1:length(trans.names)),trans.names)
  colnames(trans.names)<-c("trans_state_his","trans_names")
  heavy_df<-all.contig.annotations[grep(all.contig.annotations$c_gene,pattern = "IGH"),]
  history<-as.data.frame(history)
  colnames(history)[stringr::str_detect(pattern="barcode",string=colnames(history))]<-"barcode"

  clonotypes<-as.data.frame(table(heavy_df$raw_clonotype_id))
  colnames(clonotypes)[1]<-"raw_clonotype_id"
  clonotypes<-clonotypes[order(clonotypes$Freq,decreasing = T),]
  clonotypes$Clonotypes<-factor(c(1:nrow(clonotypes)))

  heavy_df<-merge(heavy_df,clonotypes,all = T)
  heavy_df<-merge(heavy_df, history, by="barcode",all.x = T)
  heavy_df<-merge(heavy_df, trans.names,all.x = T)
  if(!missing(top.n)){
    clonotypes<-clonotypes[1:top.n,]
    heavy_df<-heavy_df[heavy_df$raw_clonotype_id %in% clonotypes$raw_clonotype_id,]
  }
  iso<-dplyr::summarise(dplyr::group_by(heavy_df,Clonotypes,trans_names),count = dplyr::n())
  p<-ggplot2::ggplot(iso,ggplot2::aes(x=Clonotypes, y=count, fill = "trans_names"))+
    ggplot2::ylab("Cells")+
    ggplot2::xlab("Clonotypes")+
    ggplot2::geom_bar(stat="identity")+
    ggplot2::theme_classic()+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))+
    ggplot2::theme(text = ggplot2::element_text(size=20))+
    ggplot2::scale_x_discrete(expand = c(0,0))+
    ggplot2::scale_y_continuous(expand = c(0,0),limit = c(0, y.limit))+
    ggplot2::scale_fill_manual(values = colors,name="Cell Types")
  return(p)
}

#' Dataframe with clonotype counts grouped by transcriptome state(cell type).
#' @title Get information about the clonotype counts grouped by transcriptome state(cell type).
#' @param all.contig.annotations The output dataframe all_contig_annotation from function simulate.repertoire.
#' @param history The dataframe history from simulate output.
#' @param trans.names The names of cell types which are used in transcriptome.switch.prob argument in the simulation.
#' @param top.n The top n abundant clones to be shown in the plot. If missing, all clones will be shown.
#' @return a dataframe with clonotype counts grouped by transcriptome state(cell type).
#' @export
clonofreq.trans.data<-function(all.contig.annotations,history,trans.names,top.n){
  #require(stringr,dplyr)
  Clonotypes<-trans_names<-NULL
  trans.names<-data.frame(c(1:length(trans.names)),trans.names)
  colnames(trans.names)<-c("trans_state_his","trans_names")
  heavy_df<-all.contig.annotations[grep(all.contig.annotations$c_gene,pattern = "IGH"),]
  history<-as.data.frame(history)
  colnames(history)[stringr::str_detect(pattern="barcode",string=colnames(history))]<-"barcode"

  clonotypes<-as.data.frame(table(heavy_df$raw_clonotype_id))
  colnames(clonotypes)[1]<-"raw_clonotype_id"
  clonotypes<-clonotypes[order(clonotypes$Freq,decreasing = T),]
  clonotypes$Clonotypes<-factor(c(1:nrow(clonotypes)))

  heavy_df<-merge(heavy_df,clonotypes,all = T)
  heavy_df<-merge(heavy_df, history, by="barcode",all.x = T)
  heavy_df<-merge(heavy_df, trans.names,all.x = T)
  if(!missing(top.n)){
    clonotypes<-clonotypes[1:top.n,]
    heavy_df<-heavy_df[heavy_df$raw_clonotype_id %in% clonotypes$raw_clonotype_id,]
  }
  iso<-dplyr::summarise(dplyr::group_by(heavy_df,Clonotypes,trans_names),count = dplyr::n())
  return(iso)
}



#' Set idents for top abundant clones in Seurat object, get ready for highlight the top abundant clones in UMAP.
#' @param gex output from get.umap function.
#' @param all.contig.annotations The output dataframe all_contig_annotations from simulation.
#' @param top.n The top n abundant clones to be shown in the plot. If missing, all clones will be shown.
#' @return a Seurat object ready for highlight the top abundant clones in UMAP
#' @export

umap.top.highlight<-function(gex,all.contig.annotations,top.n){
  #require(dplyr,Seurat)
  clonotypes<-as.data.frame(table(all.contig.annotations$raw_clonotype_id))
  colnames(clonotypes)[1]<-"raw_clonotype_id"
  clonotypes<-clonotypes[order(clonotypes$Freq,decreasing = T),]
  if(!missing(top.n)){
    clonotypes<-clonotypes[1:top.n,]
  }
  top<-list()
  Seurat::Idents(gex)<-"Unselected"
  for (i in 1: nrow(clonotypes)){
    top[[i]]<-all.contig.annotations$barcode[which(all.contig.annotations$raw_clonotype_id==clonotypes$raw_clonotype_id[i])]
    gex<- Seurat::SetIdent(object = gex, cells = top[[i]], value =paste0("CloneRank",i))
  }
  return(gex)
}

#' Get clone network igraphs colored by seurat cluster id.
#' @param meta.data the meta.data dataframe from the Seurat object of the simulation. The object should be pre-processed and has cluster ids in the meta.data.
#' @param history The dataframe 'history' from the simulation output.
#' @param igraph.index The list 'igraph.index' from the simulation output.
#' @param empty.node If TRUE, there will be empty node in igraph. if FALSE, the empty node will be deleted.
#' @return a list of clone network igraphs colored by seurat cluster id
#' @export
cluster.id.igraph<-function(meta.data,history,igraph.index,empty.node){
  #require(reshape2,stats,igraph)
  colors<-colors
  igraph.index.attr<-list()
  igraph.index.jr<-list()
  igraph_list<-list()
  pie.values.list<-list()
  igraph_list_trans<-list()
  pie_values_trans_list<-list()
  #size.of.vertex
  size.of.vertex<-as.data.frame(stats::aggregate(barcode.history~seq.number,history,length,na.action = stats::na.pass))
  size.of.vertex1<-as.data.frame(stats::aggregate(barcode.history~seq.number,history,length))
  size.of.vertex<-merge(size.of.vertex1,size.of.vertex,all=T, by="seq.number")[,-3]
  size.of.vertex[!(size.of.vertex$seq.number %in% size.of.vertex1$seq.number),2]<-0
  colnames(size.of.vertex)<-c("seq.number","size")
  rm(size.of.vertex1)
  #meta.data get cluster id
  meta.data$barcode.history<-rownames(meta.data)
  meta.data<-data.frame(meta.data$barcode.history,meta.data$seurat_clusters)
  colnames(meta.data)<-c("barcode.history","seurat.clusters")
  history<-merge(history,meta.data,by="barcode.history",all=T)

  cluster.distribution<-reshape2::dcast(history, seq.number~seurat.clusters, value.var ="seurat.clusters",length)[,-1]
  cluster.distribution<-cluster.distribution[,1:ncol(cluster.distribution)-1]
  cluster.name<-colnames(cluster.distribution)
  cluster.distribution <-as.list( as.data.frame(t(cluster.distribution)))
  #write igraph list and assign attr
  for (i in 1:length(igraph.index)){

    if (length(igraph.index[[i]])>1){
      igraph.index.attr[[i]]<-data.frame(igraph.index[[i]])
      colnames(igraph.index.attr[[i]])<-"seq.number"

      igraph.index.jr[[i]]<-data.frame(igraph.index[[i]])
      colnames(igraph.index.jr[[i]])<-"seq.number"

      #delete duplicated for vertex attribute
      igraph.index.attr[[i]]<-subset(igraph.index.attr[[i]],!duplicated(igraph.index.attr[[i]]$seq.number))
      #size of vertex
      igraph.index.attr[[i]]<-merge(size.of.vertex,igraph.index.attr[[i]],by="seq.number",all.y=T)
      igraph.index.attr[[i]]$size[1]<-0
      igraph.index.attr[[i]]$adj_size<-(igraph.index.attr[[i]]$size*60/(range(igraph.index.attr[[i]]$size)[2]-range(igraph.index.attr[[i]]$size)[1]))^0.5+15 #visual size of vertex a number between 20 and 60
      #size of empty
      igraph.index.attr[[i]]$adj_size<-replace(igraph.index.attr[[i]]$adj_size, igraph.index.attr[[i]]$size==0, 10)

      #simplified index
      igraph.index.jr[[i]]$No<-match(x=igraph.index.jr[[i]]$seq.number,table=igraph.index.attr[[i]]$seq.number)

      #find empty
      size0<-which(igraph.index.attr[[i]]$size==0)

      No<- igraph.index.jr[[i]]$No

      #option: no empty node
      if(empty.node==F&&length(size0[-1])>0){
        No<-.RM.EMPTY.NODE(No,size0[-1])
        igraph.index.attr[[i]]<-igraph.index.attr[[i]][-size0[-1],]
        rownames(igraph.index.attr[[i]])<-c(1:nrow(igraph.index.attr[[i]]))
        size0<-size0[1]
      }

      #match cluster.distribution to each vertex
      pie.values<-list()
      for(j in 2:length(igraph.index.attr[[i]]$seq.number)){
        pie.values[[j]]<-cluster.distribution[[igraph.index.attr[[i]]$seq.number[j]]]
      }
      pie.values.list[[i]]<-pie.values


      #write graph list
      g<-igraph::graph(No,directed = T)
      g$layout<-igraph::layout_as_tree
      igraph::V(g)$label<-igraph.index.attr[[i]]$size
      igraph::V(g)$size<-igraph.index.attr[[i]]$adj_size
      igraph::V(g)$label.dist<-3

      igraph::V(g)$shape<-"pie"

      if(length(size0)>1){
        for(k in 1:length(size0)){
          igraph::V(g)$shape[[size0[k]]]<-"circle"
        }
      }
      if(length(size0)==1 ){
        igraph::V(g)$shape[[size0]]<-"circle"
      }

      igraph::V(g)$pie.color<-list(colors)
      igraph::V(g)$color<-"gray"
      igraph::V(g)$color[1]<-"black"
      ###!!!
      igraph::V(g)$pie<-pie.values.list[[i]]

      igraph_list[[i]]<-g

    }

    if (length(igraph.index[[i]])==1){
      igraph.index.attr[[i]]<-size.of.vertex$size[which(size.of.vertex$seq.number==igraph.index[[i]])]#size
      igraph.index.jr[[i]]<-data.frame(igraph.index[[i]])
      pie.values.list[[i]]<-list(cluster.distribution[igraph.index[[i]]][[1]])

      g<-igraph::graph(c(1,1), edges = NULL)
      g$layout<-igraph::layout_as_tree
      igraph::V(g)$size<-igraph.index.attr[[i]]/30+20
      igraph::V(g)$label<-igraph.index.attr[[i]]
      igraph::V(g)$label.dist<-5
      igraph::V(g)$shape<-"pie"
      igraph::V(g)$pie.color<-list(colors)
      igraph::V(g)$pie<-pie.values.list[[i]]

      igraph_list[[i]]<-g
    }
  }
  return(igraph_list)
}

#' Get clone network igraphs without empty mode. Empty node represents the 'extincted' sequences, that are not in any living cell but once existed.
#' @param history The dataframe 'history' from the simulation output.
#' @param igraph.index The list 'igraph.index' from the simulation output.
#' @param empty.node If TRUE, there will be empty node in igraph. if FALSE, the empty node will be deleted.
#' @return  a list of clone network igraphs without empty mode.
#' @export
no.empty.node<-function(history,igraph.index){
  #require(stats,reshape2)
    igraph.index.attr<-list()
    igraph.index.jr<-list()
    igraph_list<-list()
    igraph_list_trans<-list()
    pie_values_trans_list<-list()
    colors<-c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3" ,"#A6D854")
    pie.values.list<-list()
    pie.values<-list()
    #igraph
    size.of.vertex<-as.data.frame(stats::aggregate(barcode.history~seq.number,history,length,na.action = stats::na.pass))
    #NA count as 1
    size.of.vertex1<-as.data.frame(stats::aggregate(barcode.history~seq.number,history,length))
    size.of.vertex<-merge(size.of.vertex1,size.of.vertex,all=T, by="seq.number")[,-3]
    size.of.vertex[!(size.of.vertex$seq.number %in% size.of.vertex1$seq.number),2]<-0
    #NA count as 0
    colnames(size.of.vertex)<-c("seq.number","size")
    rm(size.of.vertex1)

    #isotype distribution list
    history$isotype[is.na(history$barcode.history)]<-9999
    history$trans_state_his[is.na(history$barcode.history)]<-9999

    isotype.distribution<-reshape2::dcast(history, seq.number~isotype, value.var ="isotype",length)[,-1]
    if(length(which(colnames(isotype.distribution)==9999))!=0){
      isotype.distribution<-isotype.distribution[,1:ncol(isotype.distribution)-1]
    }
    isotype.name<-colnames(isotype.distribution)
    isotype.distribution <-as.list( as.data.frame(t(isotype.distribution)))
    #transcriptome state distribution list
    trans_state_distribution<-reshape2::dcast(history, seq.number~trans_state_his, value.var ="trans_state_his",length)[,-1]
    if(length(which(colnames(trans_state_distribution)==9999))!=0){
      trans_state_distribution<-trans_state_distribution[,1:ncol(trans_state_distribution)-1]
    }
    trans_state_name<-colnames(trans_state_distribution)
    trans_state_distribution <-as.list( as.data.frame(t(trans_state_distribution)))

    #write igraph list and assign attr
    for (i in 1:length(igraph.index)){
      if (length(igraph.index[[i]])>1){
        #igraph.index[[i]]<-c(0,igraph.index[[i]])
        igraph.index.attr[[i]]<-data.frame(igraph.index[[i]])
        colnames(igraph.index.attr[[i]])<-"seq.number"

        igraph.index.jr[[i]]<-data.frame(igraph.index[[i]])
        colnames(igraph.index.jr[[i]])<-"seq.number"

        #delete duplicated for vertex attribute
        igraph.index.attr[[i]]<-subset(igraph.index.attr[[i]],!duplicated(igraph.index.attr[[i]]$seq.number))
        #size of vertex
        igraph.index.attr[[i]]<-merge(size.of.vertex,igraph.index.attr[[i]],by="seq.number",all.y=T)
        igraph.index.attr[[i]]$size[1]<-0
        igraph.index.attr[[i]]$adj_size<-(igraph.index.attr[[i]]$size*60/(range(igraph.index.attr[[i]]$size)[2]-range(igraph.index.attr[[i]]$size)[1]))^0.5+15 #visual size of vertex a number between 20 and 60
        #size of empty
        igraph.index.attr[[i]]$adj_size<-replace(igraph.index.attr[[i]]$adj_size, igraph.index.attr[[i]]$size==0, 10)

        #simplified index
        igraph.index.jr[[i]]$No<-match(x=igraph.index.jr[[i]]$seq.number,table=igraph.index.attr[[i]]$seq.number)

        #find empty
        size0<-which(igraph.index.attr[[i]]$size==0)

        No<-igraph.index.jr[[i]]$No
        #option: no empty node

        No<-.RM.EMPTY.NODE(No,size0[-1])
        if(length(size0[-1])>0){
          igraph.index.attr[[i]]<-igraph.index.attr[[i]][-size0[-1],]
        }

        rownames(igraph.index.attr[[i]])<-c(1:nrow(igraph.index.attr[[i]]))

        #match isotype.distribution to each vertex
        pie.values<-list()
        for(j in 2:length(igraph.index.attr[[i]]$seq.number)){
          pie.values[[j]]<-isotype.distribution[[igraph.index.attr[[i]]$seq.number[j]]]
        }
        pie.values.list[[i]]<-pie.values

        #match trans state distribution to each vertex
        pie_values_trans<-list()
        for(j in 2:length(igraph.index.attr[[i]]$seq.number)){
          pie_values_trans[[j]]<-trans_state_distribution[[igraph.index.attr[[i]]$seq.number[j]]]
        }
        pie_values_trans_list[[i]]<-pie_values_trans

        #write graph list
        g<-igraph::graph(No,directed = T)
        g$layout<-igraph::layout_as_tree
        igraph::V(g)$label<-igraph.index.attr[[i]]$size
        igraph::V(g)$size<-igraph.index.attr[[i]]$adj_size
        igraph::V(g)$label.dist<-3

        igraph::V(g)$shape<-"pie"
        igraph::V(g)$shape[1]<-"circle"
        igraph::V(g)$pie.color<-list(colors)
        igraph::V(g)$color[1]<-"black"
        p<-g
        ###!!!
        igraph::V(g)$pie<-pie.values.list[[i]]
        igraph::V(p)$pie<-pie_values_trans_list[[i]]

        igraph_list[[i]]<-g
        igraph_list_trans[[i]]<-p
      }

      if (length(igraph.index[[i]])==1){
        igraph.index.attr[[i]]<-size.of.vertex$size[which(size.of.vertex$seq.number==igraph.index[[i]])]#size
        igraph.index.jr[[i]]<-data.frame(igraph.index[[i]])
        pie.values.list[[i]]<-list(isotype.distribution[igraph.index[[i]]][[1]])
        pie_values_trans_list[[i]]<-list(trans_state_distribution[igraph.index[[i]]][[1]])

        g<-igraph::graph(c(1,1), edges = NULL)
        g$layout<-igraph::layout_as_tree
        igraph::V(g)$size<-igraph.index.attr[[i]]/30+20
        igraph::V(g)$label<-igraph.index.attr[[i]]
        igraph::V(g)$label.dist<-5
        igraph::V(g)$shape<-"pie"
        igraph::V(g)$pie.color<-list(colors)
        p<-g
        igraph::V(g)$pie<-pie.values.list[[i]]
        igraph::V(p)$pie<-pie_values_trans_list[[i]]

        igraph_list[[i]]<-g
        igraph_list_trans[[i]]<-p
      }
    }
    output.list<-list(igraph_list,igraph_list_trans)
    names(output.list)<-c("igraph_list_iso","igraph_list_trans")
    return(output.list)
}
Any scripts or data that you put into this service are public.
Platypus documentation built on Aug. 15, 2022, 9:08 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Platypus
Single-Cell Immune Repertoire and Gene Expression Analysis

R/Echidna_other_functions.R
In Platypus: Single-Cell Immune Repertoire and Gene Expression Analysis

Try the Platypus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Platypus Single-Cell Immune Repertoire and Gene Expression Analysis

R/Echidna_other_functions.R In Platypus: Single-Cell Immune Repertoire and Gene Expression Analysis

Try the Platypus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Platypus
Single-Cell Immune Repertoire and Gene Expression Analysis

R/Echidna_other_functions.R
In Platypus: Single-Cell Immune Repertoire and Gene Expression Analysis