R/iPVs.R

Defines functions iPVs

Documented in iPVs

#' A super function that runs through all of the necessary staeps for you to identify Independent Principal Variables
#'
#' A super function that runs through all of the necessary staeps for you to identify Independent Principal Variables
#' @param variabledata a data.frame of the variable data used to build your cormat and hclust tree
#' @param cor_method the correlation method used to construct the correlation matrix
#' @param dist_method the distance method to generate the distance matrix
#' @param hclust_meth the clustering method used by hclust
#' @param cutheight tree cut height. Value is equal to a dissimilarity of 1 - Spearman's rho.
#' @param cmat a correlation matrix you can pass to the function, which will skip the generation of a new matrix.
#' @keywords data reduction, independent variables, principal variables, tree cut
#' @export
#' @examples
#' iPVs()
iPVs = function( variabledata, cor_method = "spearman", dist_method = "R", hclust_meth = "complete", cutheight = 0.5, cmat = NULL ){

  
  if(!is.null(cmat)){
    match = sum( rownames(cmat) == colnames(variabledata) ) / nrow(cmat)
    if(match != 1){
      stop("Please make sure the row and column names of your provided correlation matrix match the column names of your provided data.frame")
    }
  }
  ## estimate correlation matrix, build tree, generate PCA from correlation matrix
  cat(paste0("(I) tree.builder -- \n"))
  wdata = tree.builder(variabledata, cor_method = cor_method, dist_method = dist_method, hclust_meth = hclust_meth , cmat = cmat)
  
  PVlistout = lapply(cutheight, function(CH){    
      ## identify the PVs (independent principal variables)
      cat(paste0("\n -- processing cut height ", CH , " \n\n"))

      cat(paste0("(II) ind.pvs -- identify independent clusters and initial principal variables\n"))
      StudyPVs = ind.pvs( variabledata = wdata$variabledata,
      tree = wdata$tree,
      cormat = wdata$cormat,
      distmat = wdata$distmat,
      cutheight = CH, 
      hclust_meth = hclust_meth )

      ## the last tree cut iteration defines our 
      ## data sets PVs
      ## the PVs identified for your data set
      mypvs = as.character( StudyPVs$PVs$pvs[,"PV"] )

      ## identify all variables that were clustered with your PV in any one of the tree cut iteractions
      ##  i.e. your PVs are tagging which other variables in your data set?
      cat(paste0("(III) Kcluster.groups -- identify all variables|members of a cluster, iteratively.\n"))
      PV_cluster_members = Kcluster.groups( ind_pv_iterations = StudyPVs$treecut_iterations )
      
      ## Perform a  PVA for the last time for for each of your iterative-super-clusters
      ##  and the VarExp by that top PV for the total variation of group members.
      cat(paste0("(IV) Kcluster_PVs -- identify the final set of PV for each cluster, and estimate the variance explained.\n"))
      Final_PVA_results = Kcluster_PVs( cmat = wdata$cormat, Kmembers = PV_cluster_members, myPVs = mypvs )
      
      ##############################
      ## place all of the useful data
      ## in a final table
      ##############################
      #cat(paste0("(V) Generate summary table with PVs, cluster members, and variance explained.\n"))
      #clustersize = unlist( lapply(PV_cluster_members, length) )
      #groupmembers = unlist( lapply(PV_cluster_members, function(x){ paste(x, collapse = ":") } ) )
      
      cat(paste0("(V) Generate summary table with PVs, and variance explained.\n"))
      clustersize = unlist( lapply(Final_PVA_results$PVresults, nrow) )
      
      
      iPV_table = data.frame(PVs = Final_PVA_results$PVtable$variable , 
        clustersize = clustersize,
        VarExp_by_PV = Final_PVA_results$PVtable$VarExp_individually,
        ind_VarExp_rank = Final_PVA_results$PVtable$ind_VarExp_rank,
        PVArank = Final_PVA_results$PVtable$PVArank )
        #groupmembers = groupmembers )

      ### data out
      out = list(iPV_table = iPV_table, 
        PV_cluster_members = PV_cluster_members,
        PVresults = Final_PVA_results$PVresults,
        workingdata = wdata )

      return(out)
    })
  ## add names to each list
  names(PVlistout) = paste0("cutheight_", cutheight)

  ### reduce if there is only 1 cut height
  if(length(PVlistout) == 1){
    PVlistout = PVlistout[[1]]
  }

  ##
  return(PVlistout)
}
hughesevoanth/iPVs documentation built on July 26, 2023, 3:21 p.m.