R/summary.phylofactor.R

Defines functions summary.phylofactor

Documented in summary.phylofactor

#' Summarize phylofactor objects
#' @export
#' @param PF phylofactor object
#' @param taxonomy taxonomy (optional) for taxonomic trimming. First column contains tree tip-labels, second column has semicolon-delimited taxonomic character strings.
#' @param factor Integer (optional). If NULL, will use print.phylofactor
#' @param taxon.trimming Chracter string. How to trim taxonomic groups. Must be either "sup" (group by shortest unique prefix for every taxon in group distinguishing it from taxa in complementary group), "taxon" for taxon levels in second column of \code{taxonomy}, or "species" for no taxonomic binning.
#' @param output.signal logical - whether or not to output signal. Will attempt to call model.fcn or customized choice.fcn to estimate signal (and thus relative importance) for each taxonomic group from \code{taxon.trimming}.
#' @return phylofactor.summary object containing data, group summary and, with input taxonomy, taxonomic break-down of signal
summary.phylofactor <- function(PF,taxonomy=NULL,factor=NULL,taxon.trimming='sup',output.signal=T){
  ### must output the following:
  # (0) factors[n,] description - if null, summarize whole object w/ biggest, #signif at holm cutoff, adjusted P-vals etc.
  # (1) model summary
  # (2) data for this factor
        # - PhyloFactor & PhyCA: transform.fcn --> contrast.fcn and OTUTable
        # - gpf: data frame input to model
        # - twoSample: NA
        # - ALL: fitted.values
  # (3) taxa table: (optional sup, full-taxon-binning,or species-level data)
        # - #species, taxa, mean (sorted), signal (tbd, possibly sorted)
  # (4) taxa.split
  # (5) species.lists
  if (is.null(factor)){
    print(PF)
  } else {
    if (factor>PF$nfactors){
      stop('factor input exceeds number of factors in phylofactor object')
    }
    if (is.null(taxonomy)){
      taxon.trimming <- 'species'
    } else {
      if (!all(class(taxonomy)=='data.frame')){
        taxonomy <- as.data.frame(taxonomy)
      }
      if (taxon.trimming %in% c('sup','taxon')){
        brks <- grepl('\\[',taxonomy[,2]) | grepl('\\]',taxonomy[,2])
        if (any(brks)){
          taxonomy[brks,2] <- gsub('\\[','~',taxonomy[brks,2])
          taxonomy[brks,2] <- gsub('\\]','~',taxonomy[brks,2])
        }
        curls <- grepl('\\{',taxonomy[,2]) | grepl('\\}',taxonomy[,2])
        if (any(curls)){
          taxonomy[curls,2] <- gsub('\\{','~',taxonomy[curls,2])
          taxonomy[curls,2] <- gsub('\\}','~',taxonomy[curls,2])
        }
        parens <- grepl('\\(',taxonomy[,2]) | grepl('\\)',taxonomy[,2])
        if (any(parens)){
          taxonomy[parens,2] <- gsub('\\(','~',taxonomy[parens,2])
          taxonomy[parens,2] <- gsub('\\)','~',taxonomy[parens,2])
        }
      }
    }
    if (!taxon.trimming %in% c('sup','taxon','species')){
      stop('unknown input taxon.trimming. Must be either "sup", "taxon", or "species".')
    }


    output <- NULL
    output$group.summary <- PF$factors[factor,]

    if (factor==1){
      Grps <- getPhyloGroups(PF$tree)
    } else {
      Grps <- pf.getPhyloGroups(PF,factor)
    }

    ### model.summary
    if (PF$phylofactor.fcn == 'PhyloFactor'){
      if (!is.null(PF$custom.output)){
        output$model.summary <- tryCatch(summary(PF$custom.output[[factor]]),
                 error=function(e) 'could not obtain summary for customized output')
      } else {
        output$model.summary <- summary(PF$models[[factor]])
      }
    } else if (PF$phylofactor.fcn=='gpf'){
      if (PF$algorithm!='CoefContrast'){
        output$model.summary <- summary(PF$models[[factor]])
      } else {
        if (all(PF$PartitioningVariables %in% colnames(PF$coefficient.matrix))){
          B <- t(PF$coefficient.matrix[,PF$PartitioningVariables,drop=F]/PF$coefficient.SE[,PF$PartitioningVariables,drop=F]) %*% PF$basis[,factor]
        } else {
          pvs_not_found <- PF$PartitioningVariables[!PF$PartitioningVariables %in% colnames(PF$coefficient.matrix)]
          pvs_found <- setdiff(PF$PartitioningVariables,pvs_not_found)
          ix <- sapply(pvs_not_found,FUN=function(a,b) grepl(a,b),b=colnames(PF$coefficient.matrix)) %>% apply(MARGIN=1,any)
          ix <- ix | colnames(PF$coefficient.matrix) %in% pvs_found
          if (!any(ix)){
            stop('Could neither match nor grep any PartitioningVariables in the coefficients of model. Try running stats::coefficients on your input model.fcn for a single species to determine the appropriate names for PartitioningVariables.')
          }
          B <- t(PF$coefficient.matrix[,ix,drop=F]/PF$coefficient.SE[,ix,drop=F]) %*% PF$basis[,factor]
        }
        output$model.summary <- data.frame('PartitioningVariable'=PF$PartitioningVariables,'CoefContrast'=B)
      }
    } else if (PF$phylofactor.fcn=='PhyCA'){
      output$model.summary <- data.frame('Percent_Explained_Variance'=PF$factors[factor,'ExpVar'])
    } else {
      output$model.summary <- data.frame('method'=PF$method,
                                         'Test_Statistic'=PF$objective[factor],
                                         'Pval'=PF$pvals[factor])
    }

    ### data ###
    if (PF$phylofactor.fcn %in% c('PhyloFactor','PhyCA')){
      y <- PF$contrast.fcn(PF$groups[[factor]],PF$transform.fcn(PF$Data))
      if (is.null(PF$X)){
        output$data <- data.frame('Data'=y)
      } else {
        if (!all(class(PF$X)=='data.frame')){
          output$data <- data.frame('Data'=y,'X'=PF$X)
        } else {
          output$data <- cbind(data.frame('Data'=y),PF$X)
        }
      }
      if (!is.null(PF$models)){
        output$data$fitted.values <- PF$models[[factor]]$fitted.values
      }
    } else if (PF$phylofactor.fcn == 'gpf'){
      if (PF$algorithm!='CoefContrast'){
        output$data <- PF$models[[factor]]$data
        output$data$fitted.values <- NA
        output$data$fitted.values[setdiff(1:nrow(output$data),PF$models[[factor]]$na.action)] <- PF$models[[factor]]$fitted.values
      } else {
        output$data <- list('Coefficient_matrix'=PF$coefficient.matrix,
                            'Coefficient_SE'=PF$coefficient.SE,
                            'Contrast_Vector'=PF$basis[,factor,drop=F],
                            'Partitioning_Variables'=PF$PartitioningVariables)
      }
    } else {
      output$data <- list('Group1'=PF$Data[PF$groups[[factor]][[1]]],
                          'Group2'=PF$Data[PF$groups[[factor]][[2]]])
    }

    ### species.list
    output$species.list <- pf.groupsTospecies(PF)[[factor]]

    ### taxa.split
    if (!is.null(taxonomy)){
      output$taxa.split <- pf.taxa(PF,taxonomy,factor)
    } else {
      output$taxa.split <- NULL
    }


    ### taxon.table ##################################################
    if (taxon.trimming=='species'){
      spp1 <- output$species.list[[1]]
      spp2 <- output$species.list[[2]]
      group1.taxa <- spp1
      group2.taxa <- spp2
      species.assignment1 <- spp1
      species.assignment2 <- spp2

    } else {
      group1.taxonomy <- taxonomy[match(output$species.list[[1]],as.character(taxonomy[,1])),]
      group2.taxonomy <- taxonomy[match(output$species.list[[2]],as.character(taxonomy[,1])),]
      if (taxon.trimming=='sup'){
        group1.taxa <- output$taxa.split[[1]]
        group2.taxa <- output$taxa.split[[2]]
      } else {
        group1.taxa <- unique(group1.taxonomy[,2])
        group2.taxa <- unique(group2.taxonomy[,2])
      }
      group1.taxa <- group1.taxa[order(nchar(group1.taxa),decreasing = T)]
      group2.taxa <- group2.taxa[order(nchar(group2.taxa),decreasing = T)]

      species.assignment1 <- vector(mode='list',length=length(group1.taxa))
      names(species.assignment1) <- group1.taxa
      species.assignment2 <- vector(mode='list',length=length(group2.taxa))
      names(species.assignment2) <- group2.taxa

      if (length(group1.taxa)==1){
        tx <- 1
      } else {
        tx <- sapply(group1.taxa,FUN=function(tax,taxa) grepl(tax,taxa),group1.taxonomy[,2]) %>%
          apply(MARGIN=1,FUN=function(g) min(which(g)))
      }
      for (i in 1:length(group1.taxa)){
        species.assignment1[[i]] <- output$species.list[[1]][tx==i]
      }

      if (length(group2.taxa)==1){
        tx <- 1
      } else {
        tx <- sapply(group2.taxa,FUN=function(tax,taxa) grepl(tax,taxa),group2.taxonomy[,2]) %>%
                    apply(MARGIN=1,FUN=function(g) min(which(g)))
      }
      for (i in 1:length(group2.taxa)){
        species.assignment2[[i]] <- output$species.list[[2]][tx==i]
      }

      output$taxon.species.assignments <- list('group1'=species.assignment1,
                                               'group2'=species.assignment2)
    }


    Tbl1 <- data.frame('Taxon'=group1.taxa,'nSpecies'=sapply(species.assignment1,length))
    Tbl2 <- data.frame('Taxon'=group2.taxa,'nSpecies'=sapply(species.assignment2,length))


    if (PF$phylofactor.fcn %in% c('PhyloFactor','PhyCA')){
      transform.means1 <- sapply(species.assignment1,FUN=function(ix,PF) mean(PF$transform.fcn(PF$Data[ix,,drop=F]),na.rm=T),PF)
      transform.vars1 <- sapply(species.assignment1,FUN=function(ix,PF) mean(apply(PF$transform.fcn(PF$Data[ix,,drop=F]),1,var,na.rm=T),na.rm=T),PF)
      transform.means2 <- sapply(species.assignment2,FUN=function(ix,PF) mean(PF$transform.fcn(PF$Data[ix,,drop=F]),na.rm=T),PF)
      transform.vars2 <- sapply(species.assignment2,FUN=function(ix,PF) mean(apply(PF$transform.fcn(PF$Data[ix,,drop=F]),1,var,na.rm=T),na.rm=T),PF)
      raw.means1 <- sapply(species.assignment1,FUN=function(ix,PF) mean(PF$Data[ix,,drop=F],na.rm=T),PF)
      raw.vars1 <- sapply(species.assignment1,FUN=function(ix,PF) mean(apply(PF$Data[ix,,drop=F],1,var,na.rm=T),na.rm=T),PF)
      raw.means2 <- sapply(species.assignment2,FUN=function(ix,PF) mean(PF$Data[ix,,drop=F],na.rm=T),PF)
      raw.vars2 <- sapply(species.assignment2,FUN=function(ix,PF) mean(apply(PF$Data[ix,,drop=F],1,var,na.rm=T),na.rm=T),PF)
      raw.sum1 <- mapply(raw.means1,FUN = function(a,b) a*b,b=sapply(species.assignment1,length))
      raw.sum2 <- mapply(raw.means2,FUN = function(a,b) a*b,b=sapply(species.assignment2,length))

      Stats1 <- data.frame('raw.sum'=raw.sum1,'raw.mean'=raw.means1,'raw.var'=raw.vars1,
                           'transformed.mean'=transform.means1,'transformed.var'=transform.vars1,row.names = NULL)
      Stats2 <- data.frame('raw.sum'=raw.sum2,'raw.mean'=raw.means2,'raw.var'=raw.vars2,
                         'transformed.mean'=transform.means2,'transformed.var'=transform.vars2,row.names=NULL)

      Tbl1 <- cbind(Tbl1,Stats1)
      Tbl2 <- cbind(Tbl2,Stats2)
      Tbl1 <- Tbl1[order(Tbl1$raw.sum,decreasing = T),]
      Tbl2 <- Tbl2[order(Tbl2$raw.sum,decreasing = T),]

    }

    if (output.signal){
      if (PF$phylofactor.fcn=='gpf'){
        if (!PF$algorithm=='mstable'){
          if (!key(PF$Data)=='Species'){
            setkey(PF$Data,Species)
          }
        }
      }
      Grps1 <- lapply(species.assignment1,FUN=function(spp,tree) match(spp,tree$tip.label),PF$tree)
      Grps2 <- lapply(species.assignment2,FUN=function(spp,tree) match(spp,tree$tip.label),PF$tree)
      Grps1 <- lapply(Grps1,FUN=function(g1,g2) list(g1,g2),g2=unlist(PF$groups[[factor]][2]))
      Grps2 <- lapply(Grps2,FUN=function(g1,g2) list(g2,g1),g2=unlist(PF$groups[[factor]][1]))

      Tbl1$signal <- sapply(Grps1,getSignal,PF)
      Tbl2$signal <- sapply(Grps2,getSignal,PF)
      Tbl1 <- Tbl1[order(Tbl1$signal,decreasing = T),]
      Tbl2 <- Tbl2[order(Tbl2$signal,decreasing = T),]
    }
    rownames(Tbl1) <- NULL
    rownames(Tbl2) <- NULL
    output$signal.table <- list('Group1'=Tbl1,'Group2'=Tbl2)

    if (PF$phylofactor.fcn %in% c('PhyloFactor','gpf')){
      if (!is.null(PF$models)){
        formula <- Reduce(paste,deparse(PF$models[[1]]$formula))
      } else {
        if (PF$phylofactor.fcn=='PhyloFactor'){
          if (PF$choice=='custom'){
            
            if (is.null(tryCatch(PF$custom.output[[1]]$formula,error=function(e) NULL))){
              formula <- 'Customized'
            } else {
              formula <- paste('
                               Formula                   : ',Reduce(paste,deparse(PF$custom.output[[1]]$formula)),sep='')
            }
          } else if (PF$method=='max.var'){
              formula <- NULL
          }
        } else {
            if (PF$algorithm=='CoefContrast'){
              formula <- Reduce(paste,deparse(PF$species.models[[1]]$formula))
            }
        }
      }
    } else {
      formula <- NULL
    }
    output$info <- list('phylofactor.fcn'=PF$phylofactor.fcn,
                        'method'=PF$method,'choice'=PF$choice,
                        'algorithm'=PF$algorithm,'formula'=formula,
                        'factor'=factor,'nEdges'=length(Grps))
    class(output) <- 'phylofactor.summary'
    return(output)
  }
}
reptalex/phylofactor documentation built on Feb. 28, 2024, 3:19 p.m.