R/utls2.R

Defines functions merge.ps line.across.facets.network2 ps.group.mean remove.zero line.across.facets.network line.across.facets.network0 add.id.facet rowCV rowSD scale_omics tax_glom_wt.3 tax_glom_wt.upper out.ps.data changeSamplenames cg.Rownm.ps rm.low.area remove_decimal merge_amp.dat random.add.ps change.rank.name subset_taxa.wt2 subset_taxa.wt subset_samples.wt change.OTU.name.ps remove_rankID make_igraph mergePs_Top_micro selectlayout filter_OTU_ps2 filter_OTU_ps scale_micro

Documented in filter_OTU_ps filter_OTU_ps2 scale_micro

#' scale_microbiome data
#'
#' @title scaling microbiome data
#' @description will use many method for microbiome data scaling
#' @param ps phyloseq abject containg microbiome data
#' @param  method could be selected by rela, sampling, log,TMM,RLE,upperquartile
#'  et al
#' @examples
#' scale_micro(ps = ps,method = "rela")# rela, sampling, log,TMM,RLE,upperquartile

scale_micro <- function(ps,
                        method = "rela"
                        ){
  if (method == "rela") {
    ps1  = phyloseq::transform_sample_counts(ps, function(x) x / sum(x,na.rm =TRUE) )

  }

  if (method == "sampling" ) {
    total = mean(sample_sums(ps));total
    standf = function(x,t = total)round(t*(x/sum(x)))
    ps1 = phyloseq::transform_sample_counts(ps,standf)
  }
  if (method == "log") {
    ps1  = phyloseq::transform_sample_counts(ps, function(x) log(1 + x) )

  }

  if (method %in% c("TMM","RLE","upperquartile")) {
    count = t(vegan_otu(ps))
    map <- as.data.frame(phyloseq::sample_data(ps))
    map$Group <- as.factor(map$Group)
    # create DGE list
    d = edgeR::DGEList(counts=count,group = map$Group)
    d$samples
    d = edgeR::calcNormFactors(d,method=method)
    # 生成实验设计矩阵
    design.mat = model.matrix(~ 0 + d$samples$group)
    colnames(design.mat)=levels(map$Group)
    d2 = edgeR::estimateGLMCommonDisp(d, design.mat)
    d2 = edgeR::estimateGLMTagwiseDisp(d2, design.mat)
    fit = edgeR::glmFit(d2, design.mat)

    otu = as.matrix(fit$fitted.values)
    head(otu)

    ps1 <- phyloseq::phyloseq(phyloseq::otu_table(as.matrix(otu),taxa_are_rows = TRUE),
                              phyloseq::tax_table(ps),
                              phyloseq::sample_data(ps)
    )
  }


  return(ps1)
}


#' filter_microbiome data
#'
#' @title filter microbiome data
#' @description filter microbiome data
#' @param ps phyloseq abject containg microbiome data
#' @param  Top number of top abundance tax or OTU
#' @examples
#' data(ps)
#' ps_sub = filter_OTU_ps(ps = ps,Top = 100)

filter_OTU_ps <- function(ps = ps,Top = NULL

){
  if (!is.null(Top)&Top != 0) {
    #
    ps_rela  = phyloseq::transform_sample_counts(ps, function(x) x / sum(x) )
    # otu table
    otu_table = as.data.frame(t(vegan_otu(ps_rela)))
    otu_table$mean = rowMeans(otu_table)
    otu_table$ID = row.names(otu_table)

    otu_table<- dplyr::arrange(otu_table, desc(mean))
    subtab = head(otu_table,Top)
    otu_table2 = as.data.frame(t(vegan_otu(ps)))
    phyloseq::otu_table(ps) = phyloseq::otu_table(as.matrix(otu_table2[subtab$ID,]),taxa_are_rows = TRUE)
  } else if(Top == 0){
    ps = ps
  }

  return(ps)
}


#' filter_microbiome data upper
#'
#' @title filter microbiome data
#' @description filter microbiome data
#' @param ps phyloseq abject containg microbiome data
#' @param  filter number or percent of tax or OTU
#' @examples
#' data(ps)
# ps_sub = filter_OTU_ps2(ps = ps,filter = 100)
# # library(ggClusterNet

filter_OTU_ps2 <- function(ps = ps,filter = NULL
){
  if (!is.null(filter)&filter != 0& filter > 1) {
    ps_rela  = phyloseq::transform_sample_counts(ps, function(x) x / sum(x) )
    # otu table
    otu_table = as.data.frame(t(vegan_otu(ps_rela)))
    otu_table$mean = rowMeans(otu_table)
    otu_table$ID = row.names(otu_table)

    otu_table<- dplyr::arrange(otu_table, desc(mean))
    subtab = head(otu_table,filter)
    otu_table2 = as.data.frame(t(vegan_otu(ps)))
    phyloseq::otu_table(ps) = phyloseq::otu_table(as.matrix(otu_table2[subtab$ID,]),taxa_are_rows = TRUE)
  } else if(filter == 0){
    ps = ps
  } else if (filter>0 & filter < 1){
    tem = ps %>%
      phyloseq::transform_sample_counts(function(x) x / sum(x) ) %>%
      filter_taxa(function(x) sum(x ) > filter , TRUE) %>%
      vegan_otu() %>%
      t()
    phyloseq::otu_table(ps) =
      phyloseq::otu_table(as.matrix(otu_table2[row.names(tem),]),taxa_are_rows = TRUE)

  }

  return(ps)
}

selectlayout <- function(m,layout = "fruchtermanreingold"){
  if (layout == "fruchtermanreingold") {
    plotcord <- data.frame(sna::gplot.layout.fruchtermanreingold(m, NULL))

  }

  if (layout == "circle") {
    plotcord <-  data.frame(sna::gplot.layout.circle(m, NULL))
  }

  if (layout == "kamadakawai") {
    plotcord <-  data.frame(sna::gplot.layout.adj(m, NULL))
  }
  if (layout == "adj") {
    plotcord <-  data.frame(sna::gplot.layout.kamadakawai(m, NULL))
  }
  if (layout == "circrand") {
    plotcord <-  data.frame(sna::gplot.layout.circrand(m, NULL))
  }
  if (layout == "eigen") {
    plotcord <-  data.frame(sna::gplot.layout.eigen(m, NULL))
  }
  if (layout == "geodist") {
    plotcord <-  data.frame(sna::gplot.layout.geodist(m, NULL))
  }
  if (layout == "hall") {
    plotcord <-  data.frame(sna::gplot.layout.hall(m, NULL))
  }

  if (layout == "mds") {
    plotcord <-  data.frame(sna::gplot.layout.mds(m, NULL))
  }

  if (layout == "princoord") {
    plotcord <-  data.frame(sna::gplot.layout.princoord(m, NULL))
  }
  if (layout == "random") {
    plotcord <-  data.frame(sna::gplot.layout.random(m, NULL))
  }
  if (layout == "rmds") {
    plotcord <-  data.frame(sna::gplot.layout.rmds(m, NULL))
  }
  if (layout == "segeo") {
    plotcord <-  data.frame(sna::gplot.layout.segeo(m, NULL))
  }
  if (layout == "seham") {
    plotcord <-  data.frame(sna::gplot.layout.seham(m, NULL))
  }
  if (layout == "spring") {
    plotcord <-  data.frame(sna::gplot.layout.spring(m, NULL))
  }
  if (layout == "springrepulse") {
    plotcord <-  data.frame(sna::gplot.layout.springrepulse(m, NULL))
  }
  if (layout == "target") {
    plotcord <-  data.frame(sna::gplot.layout.target(m, NULL))
  }

  return(plotcord)
}




mergePs_Top_micro <- function(psm = psP,
                              Top = 10,
                              j = NULL){

  res <- psm %>%
    ggClusterNet::tax_glom_wt(ranks = j)

  otu = otu_table(res)
  tax = tax_table(res)

  if (is.null(j)) {
    j = ncol(tax)
  }

  for (i in 1:dim(tax)[1]) {
    if (row.names(tax)[i] %in% names(sort(rowSums(otu), decreasing = TRUE)[1:Top])) {

      tax[i,j] =tax[i,j]
    } else {
      tax[i,j]= "others"
    }
  }
  phyloseq::tax_table(res)= tax
  res2 <- res %>%
    ggClusterNet::tax_glom_wt(ranks = j)


  return(res2)
}

make_igraph = function(cor){
  corr <- cor
  # Construct Edge File
  edges <- data.frame(from = rep(row.names(corr), ncol(corr)),
                      to = rep(colnames(corr), each = nrow(corr)),
                      r = as.vector(corr)
  )
  # Extract half of the matrix, and remove the diagonal correlation (self related to yourself)
  edges <- dplyr::filter(edges, as.vector(lower.tri(corr)))
  colnames(edges)[3] = "weight"

  edges = edges %>% dplyr::filter(weight != 0)
  #---Set the sign of the edge
  # E.color <- edges$weight
  edges$direction <- ifelse(edges$weight>0, "pp",ifelse(edges$weight<0, "np","ns"))
  node = data.frame(name = unique(c(as.character(edges$from),as.character( edges$to))))
  row.names(node) = node$name
  # Output igraph object
  igraph  = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = node)
  return(igraph)
}

# library(ggClusterNet)
# library(phyloseq)
# library(tidyverse)
# data(ps)
#
# tax = ps %>% vegan_tax() %>%
#   as.data.frame()
# head(tax)
#
# tax = remove_rankID(tax)
# tax_table(ps) = tax


remove_rankID = function(taxtab){
  taxtab$Kingdom = gsub("d__","",taxtab$Kingdom)
  taxtab$Kingdom = gsub("k__","",taxtab$Kingdom)
  taxtab$Phylum = gsub("p__","",taxtab$Phylum)
  taxtab$Class = gsub("c__","",taxtab$Class)
  taxtab$Order = gsub("o__","",taxtab$Order)
  taxtab$Family = gsub("f__","",taxtab$Family)
  taxtab$Genus = gsub("g__","",taxtab$Genus)
  taxtab$Species = gsub("s__","",taxtab$Species)
  return(taxtab)
}

# ps0 = change.OTU.name.ps(ps0)
change.OTU.name.ps = function(ps0 = ps){
  otu = ps0 %>% vegan_otu() %>% t() %>%
    as.data.frame()
  head(otu)
  rep = row.names(otu)
  row.names(otu) = paste("ASV_",1:nrow(otu),sep = "")

  tax = ps0 %>% vegan_tax() %>%
    as.data.frame()
  row.names(tax) = paste("ASV_",1:nrow(tax),sep = "")
  head(tax)
  library(Biostrings)

  tree = phy_tree(ps0)
  match(tree$tip.label,rep)

  tree$tip.label =  paste("ASV_",1:length(tree$tip.label),sep = "")

  ps = phyloseq(
    otu_table(as.matrix(otu),taxa_are_rows =TRUE),
    tax_table(as.matrix(tax)),
    sample_data(ps0),
    phy_tree(tree)
  )
  return(ps)
}




# library(ggClusterNet)
# data(ps)
# subset_samples.wt( ps = ps, Group = "Group",id = c("KO"))

subset_samples.wt = function(
    ps,
    Group,
    id,
    opst = F
){

  map = phyloseq::sample_data(ps)
  tem = map[,Group][[1]]
  tem2 = match(map[,Group][[1]],id) == 1
  if (opst == FALSE) {
    map1 = map[!is.na(tem2),]
  } else if(opst ==TRUE) {
    map1 = map[is.na(tem2),]
  }

  sample_data(ps) = map1
  return(ps)
}

# subset_taxa.wt(ps = ps,rank = "OTU",id = "ASV_1")

subset_taxa.wt = function(
    ps,
    rank,
    id,
    opst = F
){


  if (is.null(ps@tax_table)) {
    otu = ps %>% vegan_otu()
    tax = data.frame(row.names = colnames(otu),ID = colnames(otu),
                     group = colnames(otu))
    tax_table(ps) = tax_table(as.matrix(tax))

    }




  if (opst == FALSE) {



    if (rank %in% colnames(phyloseq::tax_table(ps))) {

      tax = ps %>%
        ggClusterNet::vegan_tax() %>%
        as.data.frame()

      tem = tax[,rank]
      tem2 = match(tax[,rank],id) == 1
      tax1 = tax[!is.na(tem2),]
      phyloseq::tax_table(ps) = phyloseq::tax_table(as.matrix(tax1))
    } else if(rank %in% c("ASV","OTU","Zotu","ZOTU")){
      otu = ps %>% vegan_otu() %>% t() %>% as.data.frame()
      otu = otu[id,]
      otu1 = otu[row.names(otu) != "NA",]
      phyloseq::otu_table(ps) =  phyloseq::otu_table(as.matrix(otu1),taxa_are_rows = TRUE)
    } else {
      print("No action")
    }
  } else if(opst ==TRUE) {
    # opst
    if (rank %in% colnames(phyloseq::tax_table(ps))) {

      tax = ps %>%
        ggClusterNet::vegan_tax() %>%
        as.data.frame()

      tem = tax[,rank]
      tem2 = match(tax[,rank],id) == 1
      tax1 = tax[is.na(tem2),]
      phyloseq::tax_table(ps) = phyloseq::tax_table(as.matrix(tax1))
    } else if(rank %in% c("ASV","OTU","Zotu","ZOTU")){
      otu = ps %>% vegan_otu() %>% t() %>% as.data.frame()
      # otu = otu[id,]
      # otu1 = otu[row.names(otu) != "NA",]
      otu1 = otu[!row.names(otu) %in% id,]
      phyloseq::otu_table(ps) =  phyloseq::otu_table(as.matrix(otu1),taxa_are_rows = TRUE)
    } else {
      print("No action")
    }

  }

  return(ps)
}



subset_taxa.wt2 = function(
    ps,
    rank,
    id,
    opst = F
){



  if (opst == FALSE) {

    if (is.null(ps@tax_table)) {
      otu = ps %>%
        ggClusterNet::vegan_otu() %>% t() %>%
        as.data.frame()
      tax = data.frame(row.names = row.names(otu),ID = row.names(otu) )
      tax_table(ps) = as.matrix(tax)
    }


    if (rank %in% colnames(phyloseq::tax_table(ps))) {

      tax = ps %>%
        ggClusterNet::vegan_tax() %>%
        as.data.frame()

      tem = tax[,rank]
      tem2 = match(tax[,rank],id) == 1
      tax1 = tax[!is.na(tem2),]
      phyloseq::tax_table(ps) = phyloseq::tax_table(as.matrix(tax1))
    } else if(rank %in% c("ASV","OTU","Zotu","ZOTU")){
      otu = ps %>% vegan_otu() %>% t() %>% as.data.frame()
      otu = otu[id,]
      otu1 = otu[row.names(otu) != "NA",]
      phyloseq::otu_table(ps) =  phyloseq::otu_table(as.matrix(otu1),taxa_are_rows = TRUE)
    } else {
      print("No action")
    }
  } else if(opst ==TRUE) {
    # opst
    if (rank %in% colnames(phyloseq::tax_table(ps))) {

      tax = ps %>%
        ggClusterNet::vegan_tax() %>%
        as.data.frame()

      tem = tax[,rank]
      tem2 = match(tax[,rank],id) == 1
      tax1 = tax[is.na(tem2),]
      phyloseq::tax_table(ps) = phyloseq::tax_table(as.matrix(tax1))
    } else if(rank %in% c("ASV","OTU","Zotu","ZOTU")){
      otu = ps %>% vegan_otu() %>% t() %>% as.data.frame()
      # otu = otu[id,]
      # otu1 = otu[row.names(otu) != "NA",]
      otu1 = otu[!row.names(otu) %in% id,]
      phyloseq::otu_table(ps) =  phyloseq::otu_table(as.matrix(otu1),taxa_are_rows = TRUE)
    } else {
      print("No action")
    }

  }

  return(ps)
}



change.rank.name = function(ps){
  tax = ps %>% vegan_tax() %>%
    as.data.frame()
  colnames(tax) =  c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
  tax_table(ps) = as.matrix(tax)
  return(ps)
}


# ps.f = random.add.ps(ps = ps)

random.add.ps = function(ps = ps,add= 6,zoom = 0.3,
                         addid = "_add"
                         ){
  map = sample_data(ps)
  id = map$Group %>% unique() %>% as.character()
  A = c()
  for (i in 1:length(id)){
    ps.t = ps %>% subset_samples.wt("Group",id[i]) %>%
      filter_taxa(function(x) sum(x ) > 0 , TRUE)
    tab = phyloseq::sample_sums(ps.t) %>% mean() %>% round()
    otu  = phyloseq::rarefy_even_depth(ps.t,sample.size = tab * zoom) %>%
      filter_taxa(function(x) sum(x ) > 0 , TRUE) %>% vegan_otu() %>% t() %>%
      as.data.frame()

    head(otu)

    if (dim(otu)[2] >= add&add != 1) {
    tab.d = otu[,1:add] %>% rownames_to_column("ID")
    } else if (add == 1){
      tab.d = otu %>% rownames_to_column("ID") %>%.[,1:(add+1)]

    }

    A[1:add] = id[i]

    if (i == 1) {
      dat = tab.d
      B = A
    } else{
      dat = dat %>% full_join(tab.d,by = "ID")
      B = c(B,A)
    }

    dat2 = dat %>%
      column_to_rownames("ID")
  }

  colnames(dat2) = paste(colnames(dat2),addid,sep = "")

  mapadd = data.frame(
    row.names = colnames(dat2),
    ID = colnames(dat2),
    Group = B
  )

  head(mapadd)
  head(dat2)

  ps.f = phyloseq(
    otu_table(as.matrix(dat2),taxa_are_rows = TRUE),
    sample_data(mapadd),
    tax_table(ps)
  )
  return(ps.f)
}


#--合并ps对象
# psf = merge_amp.dat(ps.a = ps,
#                     ps.m = ps.f,
#                     rank = "OTU")

merge_amp.dat = function(
    ps.a = ps,
    ps.m = ps.f,
    method = "rela",
    rank = "OTU"
){
  #--merge
  otu1 = ps.a %>% vegan_otu() %>% t() %>%
    as.data.frame() %>% rownames_to_column("id")
  head(otu1)

  otu2 = ps.m %>% vegan_otu() %>% t() %>%
    as.data.frame()%>% rownames_to_column("id")
  head(otu2)

  otu3 = otu1 %>% inner_join(otu2,by = "id") %>% column_to_rownames("id")
  head(otu3)

  map1 = ps.a %>% sample_data() %>% as.tibble() %>%
    select(ID,Group) %>% as.data.frame()
  map2 = ps.m%>% sample_data() %>% as.tibble() %>%
    select(ID,Group) %>% as.data.frame()
  map3 = rbind(map1,map2)
  row.names(map3) = map3$ID
  tax3 = ps.a %>% vegan_tax() %>% as.data.frame()

  ps3 = phyloseq::phyloseq(
    otu_table(as.matrix(as.matrix(otu3)),taxa_are_rows = TRUE) ,
    tax_table(as.matrix(tax3)),
    sample_data(map3)
  )
  return(ps3)
}

#--基于phylsoeq对象对读数数据取整
remove_decimal = function(ps = ps){
  re = ps %>% vegan_otu()
  # t()
  #-保留小数点位数达到取整目的
  for (i in 1:dim(re)[2]) {
    re[,i] = round(re[,i],0)
  }
  #--将空缺值填充为0
  re[is.na(re)] <- 0
  otu_table(ps) = otu_table(as.matrix(t(re)),taxa_are_rows = TRUE)
  return(ps)
}


# ps = readRDS("./ps_GC.rds")
# ps1 = rm.low.area(ps = ps,threshold = 10000)
# 用于将低于某一个阈值的丰度修改为0,代谢组开发
rm.low.area = function(
    ps = ps,
    threshold = 10000
){
  dat = ps %>% vegan_otu() %>% t()
  dim(dat)
  num = dat[dat < threshold] %>% length()
  print(num < dim(dat)[1]*dim(dat)[2]/5)
  dat[dat < threshold] = 0
  # row.names(dat)
  otu_table(ps) = otu_table(as.matrix(dat),taxa_are_rows = TRUE)
  return(ps)
}

#--ps对象更换row.names
cg.Rownm.ps = function(ps,id = NULL){

  if (is.null(id)) {
    psfin = ps
  } else{
    otu = ps %>% vegan_otu() %>% t() %>%
      as.data.frame()

    head(otu)

    tax = ps %>% vegan_tax() %>%
      as.data.frame()

    head(tax)
    tax$old.rname = row.names(tax)
    row.names(tax) = tax[[id]]
    row.names(otu) = tax[[id]]

    psfin = phyloseq(
      otu_table(as.matrix(otu),taxa_are_rows = TRUE),
      tax_table(as.matrix(tax)),
      sample_data(ps)
    )
  }

  return(psfin)
}

# ps01 = changeSamplenames(ps = ps0,newid = "newid" )
# sample_data(ps01)
changeSamplenames = function(
    ps = ps0,
    newid = "newid"  #in sample data ,was one of the colname
){
  if (!is.null(ps@otu_table)) {
    otu = ps %>% vegan_otu() %>% t() %>%
      as.data.frame()
  }

  if (!is.null(ps@sam_data)) {
    map = ps %>% sample_data() %>% as.tibble() %>%column_to_rownames(newid ) %>%
      as.data.frame()
  }

  colnames(otu) = row.names(map)
  if (!is.null(ps@tax_table)) {
    psout = phyloseq(sample_data(map),
                     otu_table(as.matrix(otu), taxa_are_rows=TRUE),
                     tax_table(ps)
    )
  } else{
    psout = phyloseq(sample_data(map),
                     otu_table(as.matrix(otu), taxa_are_rows=TRUE))
  }
  return(psout)
}

# out.ps.data(ps = ps01,path = outpath,mark = "16S")
# out.ps.data(ps = psFun2,path = outpath,mark = "ITS")
# out.ps.data(ps = psF2,path = outpath,mark = "function.k")
# out.ps.data(ps = psG,path = outpath,mark = "compounds")
out.ps.data = function(ps = ps,
                       path = "./",
                       mark = "16S"
){
  if (!is.null(ps@otu_table)) {
    otu = ps %>% vegan_otu() %>% t() %>% as.data.frame() %>% rownames_to_column("ID.orig")
    write_delim(otu,paste0(path,mark,"otu.txt"))}

  if (!is.null(ps@tax_table)) {
    tax = ps %>% vegan_tax() %>% as.data.frame() %>% rownames_to_column("ID.orig")
    write_delim(tax,paste0(path,mark,"tax.txt"))
  }

  if (!is.null(ps@sam_data)) {
    map = ps %>% sample_data() %>% as.data.frame() %>%
      rownames_to_column("ID.seq") %>% as.tibble()
    write_delim(map,paste0(path,mark,"map.txt"))
  }
}


# tax_glom_wt.upper(ps = psG,ranks = "Metabolite")
tax_glom_wt.upper <- function(ps = psG,ranks = "Metabolite") {


  if (  is.numeric(ranks)) {
    ranks <- phyloseq::rank.names(ps)[ranks]
  }
  tax = ps %>% vegan_tax() %>% as.data.frame() %>%
    mutate(conb =!!sym(ranks) )
  head(tax)
  tax_table(ps) = tax_table(as.matrix(tax))

  ranks= "conb"

  otu <- as.data.frame(t(vegan_otu(ps)))
  tax <- as.data.frame(vegan_tax(ps))

  # building group
  tax[[ranks]][is.na(tax[[ranks]])] = "Unknown"
  tax[[ranks]][tax[[ranks]] == ""] = "Unknown"
  tax[[ranks]][tax[[ranks]] == "NA"] = "Unknown"
  split <- split(otu,tax[[ranks]])
  #calculate sum by group
  apply <- lapply(split,function(x)colSums(x[]))
  # result chack
  otucon <- do.call(rbind,apply)

  taxcon <- tax[1:match(ranks,colnames(tax))]
  taxcon <- taxcon[!duplicated(tax[[ranks]]),]

  if (is.vector(taxcon)) {
    taxcon = data.frame(row.names = taxcon,ranks = taxcon)
    colnames(taxcon) = ranks
  }

  #-tax name with NA wound be repeated with unknown
  taxcon[[ranks]][is.na(taxcon[[ranks]])] = "unknown"
  row.names(taxcon) <- taxcon[[ranks]]


  row.names(taxcon) <- gsub("-","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[/]","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[(]","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[)]","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[:]","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[[]","",row.names(taxcon))
  row.names(taxcon) <- gsub("[]]","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[#]","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[+]","_",row.names(taxcon))
  # row.names(taxcon) <- gsub("[ ]","_",row.names(taxcon))
  row.names(taxcon) <- gsub("[']","_",row.names(taxcon))
  # row.names(taxcon) <- gsub("[']","_",row.names(taxcon))


  row.names(otucon) <- gsub("-","_",row.names(otucon))
  row.names(otucon) <- gsub("[/]","_",row.names(otucon))
  row.names(otucon) <- gsub("[(]","_",row.names(otucon))
  row.names(otucon) <- gsub("[)]","_",row.names(otucon))
  row.names(otucon) <- gsub("[:]","_",row.names(otucon))
  row.names(otucon) <- gsub("[[]","",row.names(otucon))
  row.names(otucon) <- gsub("[]]","_",row.names(otucon))
  row.names(otucon) <- gsub("[#]","_",row.names(otucon))
  row.names(otucon) <- gsub("[+]","_",row.names(otucon))
  # row.names(otucon) <- gsub(" ","_",row.names(otucon))
  row.names(otucon) <- gsub("[']","_",row.names(otucon))


  pscon <- phyloseq::phyloseq(
    phyloseq::otu_table( as.matrix(otucon),taxa_are_rows = TRUE),
    phyloseq::tax_table(as.matrix(taxcon)),
    phyloseq::sample_data(ps)
  )
  return(pscon)
}



tax_glom_wt.3 <- function(ps = ps,ranks = "Phylum") {




  if (  is.numeric(ranks)) {
    ranks <- phyloseq::rank.names(ps)[ranks]
  }

  tax = ps %>% vegan_tax() %>% as.data.frame() %>%
    mutate(conb =!!sym(ranks) )
  head(tax)
  tax_table(ps) = tax_table(as.matrix(tax))

  ranks= "conb"

  otu <- as.data.frame(t(vegan_otu(ps)))
  tax <- as.data.frame(vegan_tax(ps))

  # building group
  tax[[ranks]][is.na(tax[[ranks]])] = "Unknown"
  tax[[ranks]][tax[[ranks]] == ""] = "Unknown"
  tax[[ranks]][tax[[ranks]] == "NA"] = "Unknown"
  split <- split(otu,tax[[ranks]])
  #calculate sum by group
  apply <- lapply(split,function(x)colSums(x[]))
  # result chack
  otucon <- do.call(rbind,apply)

  taxcon <- tax[1:match(ranks,colnames(tax))]
  taxcon <- taxcon[!duplicated(tax[[ranks]]),]

  if (is.vector(taxcon)) {
    taxcon = data.frame(row.names = taxcon,ranks = taxcon)
    colnames(taxcon) = ranks
  }

  #-tax name with NA wound be repeated with unknown
  taxcon[[ranks]][is.na(taxcon[[ranks]])] = "unknown"
  row.names(taxcon) <- taxcon[[ranks]]


  pscon <- phyloseq::phyloseq(
    phyloseq::otu_table( as.matrix(otucon),taxa_are_rows = TRUE),
    phyloseq::tax_table(as.matrix(taxcon)),
    phyloseq::sample_data(ps)
  )
  return(pscon)
}


# library(ggClusterNet)
# library(tidyverse)
# library(phyloseq)

scale_omics = function(
    ps = ps,
    method = "TPM",
    efflength =  1000

){

  otu = ps %>% vegan_otu()
  if (method == "TPM") {
    efflength =  efflength
  }

  if (method == "TPM") {
    counts2TPM <- function(count=count, efflength=  efflength){
      RPK <- count/(efflength/1000)   #每千碱基reads (Reads Per Kilobase) 长度标准化
      PMSC_rpk <- sum(RPK)/1e6        #RPK的每百万缩放因子 (“per million” scaling factor ) 深度标准化
      RPK/PMSC_rpk
    }

    tpm <- as.data.frame(apply(otu,2,counts2TPM) %>% t())
    otu_table(ps) = otu_table(as.matrix(tpm),taxa_are_rows = TRUE)
  }

  if (method == "FPKM") {
    expr1 = otu/efflength
    fpkm = t(t(expr1)/colSums(otu)) * 10^9
    otu_table(ps) = otu_table(as.matrix(fpkm),taxa_are_rows = TRUE)
  }
  if (method == "RPKM") {
    RPKM = function(count=count, efflength=  efflength){
      lib.size = sum(count)
      rpm = (count*10^9)/lib.size
      rpkm = rpm/efflength
      return(rpkm)
    }

    rpkm = RPKM(count= otu, efflength=  efflength) %>% t()

    otu_table(ps) = otu_table(as.matrix(rpkm),taxa_are_rows = TRUE)

  }

  return(ps)
}



rowSD = function(x){
  apply(x,1, sd)
}

rowCV = function(x){
  rowSD(x)/rowMeans(x)
}


add.id.facet = function(x = d,group = "id"){
  tem = x %>% distinct(!!sym(group))
  colnames(tem) = "nm.tem"
  id.s = tem$nm.tem
  for (i in 1:length(id.s)) {
    tem2 = x %>% filter(!!sym(group) == id.s[i])
    tem2$id.facet = paste0(id.s[i],"_",1:dim(tem2)[1])

    if (i == 1) {
      tem3 = tem2
    } else{
      tem3 = rbind(tem3,tem2)
    }
  }
  return(tem3)
}



line.across.facets.network0 <- function(p, from=1, to=2,
                                       from_point_id=1,
                                       to_point_id=1,
                                       plotout = F,
                                       gp=gpar(lty=2, alpha=0.5)){
  if (TRUE %in% grepl("ggplot", class(p))) {
    g <- ggplot_gtable(ggplot_build(p))
  } else {
    g <- p
  }

  # if one of them contains NA, return g
  if (NA %in% c(from, to, from_point_id, to_point_id)) return(g)

  # collect panel viewport names and index numbers in the grob
  panel_vps <- c()
  id_n <- c()
  for (i in 1:length(g$grobs)) {
    if (str_detect(g$layout[i, "name"], "panel") & g$grobs[[i]]$name != "NULL") {
      p_name <- g$layout[i, "name"]
      panel_vps <- c(panel_vps, p_name)
      id_n <- c(id_n, i)
    }
  }

  panel_vps %>%
    str_replace("panel-", "") %>%
    str_split("[\\-\\.]") %>%
    map_chr(1) -> ind_col
  ind_col <- as.numeric(ind_col)

  panel_vps %>%
    str_replace("panel-", "") %>%
    str_split("[\\-\\.]") %>%
    map_chr(2) -> ind_row
  ind_row <- as.numeric(ind_row)

  my_dim <- c(max(ind_row), max(ind_col))
  x <- 1:length(id_n)
  L <- length(x)
  # x[(L+1):(my_dim[1]*my_dim[2])] <- NA
  m1 <- as.vector(matrix(x, nrow=my_dim[1], byrow=TRUE))

  x2 <- 1:L
  xx <- as.vector(!is.na(m1))
  xx[xx] <- x2
  xx[!xx] <- NA
  m2 <- as.vector(matrix(xx, nrow=my_dim[1]))

  from <- m2[m1==from]
  from <- from[complete.cases(from)]
  to <- m2[m1==to]
  to <- to[complete.cases(to)]

  # select points to be connected
  pnames1 <- names(g$grobs[[id_n[from]]]$children)
  pnames2 <- names(g$grobs[[id_n[to]]]$children)

  pname1 <- pnames1[str_detect(pnames1, "geom_point.points")]
  pname2 <- pnames2[str_detect(pnames2, "geom_point.points")]

  p1 <- g$grobs[[id_n[from]]]$children[[pname1[1]]]
  p2 <- g$grobs[[id_n[to]]]$children[[pname2[1]]]

  g1 <- with(g$layout[id_n[from],],
             gtable_add_grob(g,
                             moveToGrob(p1$x[from_point_id],
                                        p1$y[from_point_id]),
                             t=t, l=l,name = paste0(from,to,from_point_id,to_point_id)))
  g2 <- with(g1$layout[id_n[to],],
             gtable_add_grob(g1,
                             lineToGrob(p2$x[to_point_id],
                                        p2$y[to_point_id], gp=gp),
                             t=t, l=l,name = paste0(from,to,from_point_id,to_point_id)))

  # print(p1$x[from_point_id])
  # print(p1$y[from_point_id])
  # print(p2$x[to_point_id])
  # print(p2$y[to_point_id])
  # g <- gtable_add_grob(g,
  #                      moveToGrob(p1$x[from_point_id],
  #                                 p1$y[from_point_id]),
  #                      t=id_n[from], l=id_n[from])
  #
  # g <- gtable_add_grob(g,
  #                      lineToGrob(p2$x[to_point_id],
  #                                 p2$y[to_point_id], gp=gp),
  #                      t=id_n[2], l=id_n[2])
  # tried curve, but it seems no function for curve across viewports, this is within viewport plot
  # g <- with(g$layout[id_n[to],],
  #           gtable_add_grob(g,
  #                           segmentsGrob(p1$x[from_point_id],
  #                                     p1$y[from_point_id],
  #                                     p2$x[to_point_id],
  #                                     p2$y[to_point_id], gp=gp),
  #                           t=t, l=l))


  g2$layout$clip <- "off"
  if (plotout==TRUE) grid.draw(g2)
  return(g2)
}
line.across.facets.network <- function(p, from=1, to=2,
                                       from_point_id=1,
                                       to_point_id=1,
                                       plotout = F,
                                       gp=gpar(lty=2, alpha=0.5)){
  if (TRUE %in% grepl("ggplot", class(p))) {
    g <- ggplot_gtable(ggplot_build(p))
  } else {
    g <- p
  }

  # if one of them contains NA, return g
  if (NA %in% c(from, to, from_point_id, to_point_id)) return(g)

  # collect panel viewport names and index numbers in the grob
  panel_vps <- c()
  id_n <- c()
  for (i in 1:length(g$grobs)) {
    if (str_detect(g$layout[i, "name"], "panel") & g$grobs[[i]]$name != "NULL") {
      p_name <- g$layout[i, "name"]
      panel_vps <- c(panel_vps, p_name)
      id_n <- c(id_n, i)
    }
  }

  panel_vps %>%
    str_replace("panel-", "") %>%
    str_split("[\\-\\.]") %>%
    map_chr(1) -> ind_col
  ind_col <- as.numeric(ind_col)

  panel_vps %>%
    str_replace("panel-", "") %>%
    str_split("[\\-\\.]") %>%
    map_chr(2) -> ind_row
  ind_row <- as.numeric(ind_row)

  my_dim <- c(max(ind_row), max(ind_col))
  x <- 1:length(id_n)
  L <- length(x)
  # x[(L+1):(my_dim[1]*my_dim[2])] <- NA
  m1 <- as.vector(matrix(x, nrow=my_dim[1], byrow=TRUE))

  x2 <- 1:L
  xx <- as.vector(!is.na(m1))
  xx[xx] <- x2
  xx[!xx] <- NA
  m2 <- as.vector(matrix(xx, nrow=my_dim[1]))

  from <- m2[m1==from] %>% unique()
  from <- from[complete.cases(from)]
  to <- m2[m1==to]
  to <- to[complete.cases(to)]

  # select points to be connected
  pnames1 <- names(g$grobs[[id_n[from]]]$children)
  pnames2 <- names(g$grobs[[id_n[to]]]$children)

  pname1 <- pnames1[str_detect(pnames1, "geom_point.points")]
  pname2 <- pnames2[str_detect(pnames2, "geom_point.points")]

  p1 <- g$grobs[[id_n[from]]]$children[[pname1[1]]]
  p2 <- g$grobs[[id_n[to]]]$children[[pname2[1]]]

  g1 <- with(g$layout[id_n[from],],
             gtable_add_grob(g,
                             moveToGrob(p1$x[from_point_id],
                                        p1$y[from_point_id]),
                             t=t, l=l,name = paste0(from,to,from_point_id,to_point_id)))
  g2 <- with(g1$layout[id_n[to],],
             gtable_add_grob(g1,
                             lineToGrob(p2$x[to_point_id],
                                        p2$y[to_point_id], gp=gp),
                             t=t, l=l,name = paste0(from,to,from_point_id,to_point_id)))

  # print(p1$x[from_point_id])
  # print(p1$y[from_point_id])
  # print(p2$x[to_point_id])
  # print(p2$y[to_point_id])
  # g <- gtable_add_grob(g,
  #                      moveToGrob(p1$x[from_point_id],
  #                                 p1$y[from_point_id]),
  #                      t=id_n[from], l=id_n[from])
  #
  # g <- gtable_add_grob(g,
  #                      lineToGrob(p2$x[to_point_id],
  #                                 p2$y[to_point_id], gp=gp),
  #                      t=id_n[2], l=id_n[2])
  # tried curve, but it seems no function for curve across viewports, this is within viewport plot
  # g <- with(g$layout[id_n[to],],
  #           gtable_add_grob(g,
  #                           segmentsGrob(p1$x[from_point_id],
  #                                     p1$y[from_point_id],
  #                                     p2$x[to_point_id],
  #                                     p2$y[to_point_id], gp=gp),
  #                           t=t, l=l))


  g2$layout$clip <- "off"
  if (plotout==TRUE) grid.draw(g2)
  return(g2)
}


remove.zero = function(ps) {
  pst = ps %>%
    # scale_micro() %>%
    filter_taxa(function(x) sum(x ) > 0 , TRUE)
  return(pst)
}

ps.group.mean = function(ps,group = "Group" ){
  otu_table = as.data.frame(t(vegan_otu(ps)))
  head(otu_table)

  design = as.data.frame(sample_data(ps))
  ## 计算相对丰度,计算每个物种丰度均值,按照均值排序
  OTU = as.matrix(otu_table)
  norm = t(t(OTU)/colSums(OTU,na=TRUE)) #* 100 # normalization to total 100
  norma = norm %>%
    t() %>% as.data.frame()
  #数据分组计算平均值
  iris.split <- split(norma,as.factor(design$Group))

  iris.apply <- lapply(iris.split,function(x)colMeans(x,na.rm = TRUE))
  # 组合结果
  norm2 <- do.call(rbind,iris.apply)%>%
    t()
  norm2 = as.data.frame(norm2)
  norm2$mean=apply(norm2,1,mean)
  norm2$ID = row.names(norm2)
  return(norm2)
}

line.across.facets.network2 <- function(p, from=1, to=2,
                                        from_point_id=1,
                                        to_point_id=1,
                                        plotout = F,
                                        gp=gpar(lty=2, alpha=0.5)){
  if (TRUE %in% grepl("ggplot", class(p))) {
    g <- ggplot_gtable(ggplot_build(p))
  } else {
    g <- p
  }

  # if one of them contains NA, return g
  if (NA %in% c(from, to, from_point_id, to_point_id)) return(g)

  # collect panel viewport names and index numbers in the grob
  panel_vps <- c()
  id_n <- c()
  for (i in 1:length(g$grobs)) {
    if (str_detect(g$layout[i, "name"], "panel") & g$grobs[[i]]$name != "NULL") {
      p_name <- g$layout[i, "name"]
      panel_vps <- c(panel_vps, p_name)
      id_n <- c(id_n, i)
    }
  }

  panel_vps %>%
    str_replace("panel-", "") %>%
    str_split("[\\-\\.]") %>%
    map_chr(1) -> ind_col
  ind_col <- as.numeric(ind_col)

  panel_vps %>%
    str_replace("panel-", "") %>%
    str_split("[\\-\\.]") %>%
    map_chr(2) -> ind_row
  ind_row <- as.numeric(ind_row)

  my_dim <- c(max(ind_row), max(ind_col))
  x <- 1:length(id_n)
  L <- length(x)
  # x[(L+1):(my_dim[1]*my_dim[2])] <- NA
  m1 <- as.vector(matrix(x, nrow=my_dim[1], byrow=TRUE))

  x2 <- 1:L
  xx <- as.vector(!is.na(m1))
  xx[xx] <- x2
  xx[!xx] <- NA
  m2 <- as.vector(matrix(xx, nrow=my_dim[1]))

  from <- m2[m1==from] %>% unique()
  from <- from[complete.cases(from)]
  to <- m2[m1==to]
  to <- to[complete.cases(to)]

  # select points to be connected
  pnames1 <- names(g$grobs[[id_n[from]]]$children)
  pnames2 <- names(g$grobs[[id_n[to]]]$children)

  pname1 <- pnames1[str_detect(pnames1, "geom_rect.rect")]
  pname2 <- pnames2[str_detect(pnames2, "geom_rect.rect")]

  p1 <- g$grobs[[id_n[from]]]$children[[pname1[1]]]
  p2 <- g$grobs[[id_n[to]]]$children[[pname2[1]]]

  g1 <- with(g$layout[id_n[from],],
             gtable_add_grob(g,
                             moveToGrob(p1$x[from_point_id],
                                        p1$y[from_point_id]),
                             t=t, l=l,name = paste0(from,to,from_point_id,to_point_id)))
  g2 <- with(g1$layout[id_n[to],],
             gtable_add_grob(g1,
                             lineToGrob(p2$x[to_point_id],
                                        p2$y[to_point_id], gp=gp),
                             t=t, l=l,name = paste0(from,to,from_point_id,to_point_id)))

  # print(p1$x[from_point_id])
  # print(p1$y[from_point_id])
  # print(p2$x[to_point_id])
  # print(p2$y[to_point_id])
  # g <- gtable_add_grob(g,
  #                      moveToGrob(p1$x[from_point_id],
  #                                 p1$y[from_point_id]),
  #                      t=id_n[from], l=id_n[from])
  #
  # g <- gtable_add_grob(g,
  #                      lineToGrob(p2$x[to_point_id],
  #                                 p2$y[to_point_id], gp=gp),
  #                      t=id_n[2], l=id_n[2])
  # tried curve, but it seems no function for curve across viewports, this is within viewport plot
  # g <- with(g$layout[id_n[to],],
  #           gtable_add_grob(g,
  #                           segmentsGrob(p1$x[from_point_id],
  #                                     p1$y[from_point_id],
  #                                     p2$x[to_point_id],
  #                                     p2$y[to_point_id], gp=gp),
  #                           t=t, l=l))


  g2$layout$clip <- "off"
  if (plotout==TRUE) grid.draw(g2)
  return(g2)
}



merge.ps <- function(ps1 ,
                     ps2,
                     N1 = 100,
                     N2 = 100,
                     scale = TRUE,
                     onlygroup = FALSE,#不进行列合并,只用于区分不同域
                     dat1.lab = "bac",
                     dat2.lab = "fun") {

  if (scale == TRUE) {
    if (!is.null(ps16s)) {
      ps1  = phyloseq::transform_sample_counts(ps1, function(x) x / sum(x) )
    }
    if (!is.null(psITS)) {
      ps2  = phyloseq::transform_sample_counts(ps2, function(x) x / sum(x) )
    }
  }
  if (!is.null(ps1)) {
    # ps_16s = phyloseq::filter_taxa(ps16s, function(x) mean(x) > N16s, TRUE)#select OTUs according to  relative abundance
    ps_16s  =  filter_OTU_ps(ps = ps1,Top = N1)
    ###
    otu_table_16s = as.data.frame(t(vegan_otu(ps_16s)))
    row.names(otu_table_16s) = paste(dat1.lab,row.names(otu_table_16s),sep = "_")
    ## change the OTU name of bac and fungi OTU table
    tax_table_16s = as.data.frame(vegan_tax(ps_16s))
    #-- add a col marked the bac and fungi
    if ("filed" %in%colnames(tax_table_16s)) {

    } else{
      row.names(tax_table_16s) = paste(dat1.lab,row.names(tax_table_16s),sep = "_")
      tax_table_16s$filed = rep(dat1.lab,length(row.names(tax_table_16s)))
    }

  }
  if (!is.null(ps2)) {
    # ps_ITS = phyloseq::filter_taxa(psITS, function(x) mean(x) > NITS , TRUE)#select OTUs according to  relative abundance
    ps_ITS = filter_OTU_ps(ps = ps2,Top = N2)
    otu_table_ITS = as.data.frame(t(vegan_otu(ps_ITS)))
    row.names(otu_table_ITS) = paste(dat2.lab,row.names(otu_table_ITS ),sep = "_")
    tax_table_ITS = as.data.frame(vegan_tax(ps_ITS))
    row.names(tax_table_ITS) = paste(dat2.lab,row.names(tax_table_ITS),sep = "_")
    tax_table_ITS$filed = rep(dat2.lab,length(row.names(tax_table_ITS)))

    if ("filed" %in%colnames(tax_table_ITS)) {
    } else{
      row.names(tax_table_ITS) = paste(dat2.lab,row.names(tax_table_ITS),sep = "_")
      tax_table_ITS$filed = rep(dat2.lab,length(row.names(tax_table_ITS)))
    }
  }


  if (!is.null(ps2) & !is.null(ps1) ) {
    ## merge OTU table of bac and fungi



    otu_table = rbind(otu_table_16s[,intersect(names(otu_table_ITS),names(otu_table_16s))],

                      otu_table_ITS[,intersect(names(otu_table_ITS),names(otu_table_16s))])

    if (onlygroup == FALSE) {
      tax_table = rbind(tax_table_16s,tax_table_ITS)
      dim(otu_table)
    } else if(onlygroup == TRUE){
      tax_table = data.frame(filed = c(tax_table_16s$filed,tax_table_ITS$filed),row.names = row.names(otu_table),id = row.names(otu_table))
    }
    #on of map table as final map table

    mapping = as.data.frame( phyloseq::sample_data(ps_16s))
    head(mapping)
    # mapping$Group4 = "all_sample"
    # mapping$Group4 = as.factor(mapping$Group4)
    ##merge all abject of phyloseq
    pallps <-  phyloseq::phyloseq( phyloseq::otu_table(as.matrix(otu_table),taxa_are_rows = TRUE),
                                   phyloseq::sample_data(mapping),
                                   phyloseq::tax_table(as.matrix(tax_table)))


  } else if(is.null(psITS) & !is.null(ps16s) ) {
    otu_table = rbind(otu_table_16s)

    if (onlygroup == FALSE) {
      tax_table = rbind(tax_table_16s)
      dim(otu_table)
    } else if(onlygroup == TRUE){
      tax_table = data.frame(filed = c(tax_table_16s$filed,row.names = row.names(otu_table),id = row.names(otu_table)))
    }
    #on of map table as final map table
    mapping = as.data.frame(sample_data(ps_16s))
    head(mapping)
    # mapping$Group4 = "all_sample"
    # mapping$Group4 = as.factor(mapping$Group4)
    ##merge all abject of phyloseq
    pallps <-  phyloseq::phyloseq( phyloseq::otu_table(as.matrix(otu_table),taxa_are_rows = TRUE),
                                   phyloseq::sample_data(mapping),
                                   phyloseq::tax_table(as.matrix(tax_table)))


  } else if (!is.null(ps2) & is.null(ps1)){
    otu_table = rbind(otu_table_ITS)

    if (onlygroup == FALSE) {
      tax_table = rbind(tax_table_ITS)
      dim(otu_table)
    } else if(onlygroup == TRUE){
      tax_table = data.frame(filed = c(tax_table_ITS$filed),row.names = row.names(otu_table),id = row.names(otu_table))
    }
    #on of map table as final map table
    mapping = as.data.frame( phyloseq::sample_data(psITS))
    head(mapping)
    # mapping$Group4 = "all_sample"
    # mapping$Group4 = as.factor(mapping$Group4)
    ##merge all abject of phyloseq
    pallps <-  phyloseq::phyloseq( phyloseq::otu_table(as.matrix(otu_table),taxa_are_rows = T),
                                   phyloseq::sample_data(mapping),
                                   phyloseq::tax_table(as.matrix(tax_table)))

  }

  tax = pallps %>% vegan_tax() %>%
    as.data.frame() %>% dplyr::select(filed,everything())
  phyloseq::tax_table(pallps) = as.matrix(tax)


  return(pallps)
}
taowenmicro/ggClusterNet documentation built on March 29, 2024, 1:32 a.m.