R/search.R

Defines functions search

Documented in search

#'The 'search' step of the ExplodeLayout(EL) algorithm.
#'
#'This function takes as input the network nodes and edges, node coordinates, and cluster information,
#'and outputs a range of networks where the clusters are separated to different degrees.
#'
#'Input:
#'
#'(1) A 'nodelist' data object which contains labels, node coordinates, an outcome (like case or control),
#'entity (used for bipartite networks to identify the two sets of nodes), and cluster membership for
#'each node in the network. The 'nodelist' must be a dataframe with 8 columns with the following labels:
#''Label', 'FRX', 'FRY' ''KKX', 'KKY', 'Outcome', 'Cluster', 'Entity'. (FR and KK refer to coordinates
#'generated from running the Fruchterman Reingold, and Kamada Kawai layout algorithms on the inputted
#'network. If this information is not available, please apply the 'dataConvert' function in this package
#'to transfer your data and generate initial coordinates).
#'
#'(2) A 'network' data object containing the network data.The 'network' must be a dataframe or matrix.
#'When the data is a bipartite network, the matrix should be a biadjacency matrix, where the column names
#' are variable names, and row names are samples such as subjects. When the network data is unipartite
#' (only one set of nodes with edges connecting any pair), the matrix should be an adjacent matrix, and
#' the column and row labels are the same. The first column of the network file should be the outcome
#' of the nodes (1 representing sick patients, and 2 representing healthy patients). Please note: the
#' node names in 'network' must match the ones in 'nodelist'. As the two data sets are not trivial to
#' make manually, we strongly recommend using our 'dataConvert' function.
#'
#'Output:
#'
#'The 'search' function will return a list containing two dataframes. (1) 'coordinates' contains the
#'coordinates of all nodes in networks with different 'radius' ranging from 0.1 to 2.0 ('radius' is
#'a parameter in EL algorith). (2) 'edgelist' is the edgelist of the input network. 'coordinates' and
#''edgelist' are inputs for the visualization step. By default, 'search' saves the 'coordinates' and
#''edgelist' dataframes into two files in user's current working folder. However, these can also be
#'stored in memory by or outputed as .Rdata files. The 'search' function also outputs a 'statList.dat' file,
#'which is useful for determining which graph(s) to visualize, based on their optimal cluster separation
#'score (CSS) which represents the ratio of the overlap among the bounding boxes of the clusters, to the
#'overall bounding box.
#'
#'@param nodelistFile  A nodelist object using a dataframe where the first column is the name of the
#'nodes in the network.
#'@param networkFile A network object using a matrix or dataframe, where first column is the outcome.
#'@param projName A string denoting the project or data name specified by user. This string will be
#'used to name the exported files. Default = 'defaultName'
#'@param coordsProvided A Boolean variable, which specifies whether the nodelistFile contains node
#'coordinates generated by the user through a layout algorithm such as from the Kamada-Kawai or Fruchterman
#'Reingold layout algorithms. Default = TRUE. (If the nodelist is generated using 'dataConvert' function,
#'it will contain two sets of original coordinates generated from the 'Fruchterman-Reingold or Kamada-Kawai
#'layout algorithms.)
#'@param initLayout A string with values 'fr' or 'kk' to select the Fruchterman-Reingold or the Kamada-Kawai
#'layout methods respectively. Default = 'fr'.
#'@param radialOrEquiDist : A string with values 'equidist' (cluster centroids are equidistant on the explode circle)
#' or 'radial' (cluster centroids are moved out radially using the center of the network as the origin, and
#' preserving the angle between centroids in the original layout). Default = 'equidist'
#'@param shiftOrRotate A string with values 'shift' (clusters are moved without rotation and therefore does not
#'correct for change in orientation after exploding), or 'rotate' (clusters are moved with rotation to correct for
#'change in the orientation after exploding). Default = 'rotate'.
#'@param selectAlgoForCentroids A string with values 'mean', 'median' or 'minmax' denoting how the centroids are
#'generated. Default='median'.
#'@param rings An integer denoting the number of imaginary circles used by EL to layout the clusters. Default = 1.
#'@param output A boolean variable specifying whether to save the output of the function into local files. Default = TRUE.
#'
#'@references \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5543384/}
#'
#'@export
search <- function(nodelistFile, networkFile, projName = 'defaultName', coordsProvided = TRUE,
                   initLayout = 'fr', radialOrEquiDist = 'equidist', shiftOrRotate = 'rotate',
                   selectAlgoForCentroids = 'median', rings = 1, output = TRUE) {
  #setting default parameters for explodeLayout
  input <- list()
  rvalues <- list()

  #true means there's coordinates file to input
  input$coordfileCheckBox <- coordsProvided
  #choose layout algorithm if no initial coodinates are read in
  input$layoutAlgoType <- initLayout
  #choose equil distance centroids on the explodeLayout circle
  input$radialOrEquiDist <- radialOrEquiDist
  #number of circles for explodeLayout
  input$rings <- rings
  #choose how to decide centroids for each cluster
  input$selectAlgoForCentroids <- selectAlgoForCentroids
  #choose whether to shift or rotate clusters
  input$shiftOrRotate <- shiftOrRotate

  #choose view types (2 means the standard explodeLayout, other views would be added later)
  input$viewTypes <- '2' #ExplodeLayout View
  input$displace <- 0.4  #???


  nodelist <- convertNodeListFile(nodelistFile, input)
  rvalues$netType <- length(unique(nodelist$entity$Entity))
  if (rvalues$netType > 2) {stop("Data not supported. Make sure your input has only one or two entities.")}

  #get data from file, edgelist.dat file is saved
  #rvalues$dataFromFile <- getData(basePath, input, rvalues, nodelist$modules)
  rvalues$dataFromFile <- getDataFromFile(networkFile, nodelist$modules, rvalues)

  rvalues$entity <- nodelist$entity    #as.matrix(entity)

  #rvalues$colnames <- colnames(rvalues$dataFromFile$df)
  rvalues$colnames <- nodelist$coords[,1]

  rvalues$nrows <- nrow(rvalues$dataFromFile$df)

  rvalues$outcome <- nodelist$outcomes

  rvalues$coord <- getOriginalNetworkCoordinates(input, rvalues$dataFromFile$g, nodelist$coords)

  res <- findClosestRadius(input, rvalues$dataFromFile, rvalues$coord)
  radius <- res$radius

  rvalues$originalScore <- res$originalScore
  rvalues$ClosestRadius <- radius

  GOEresult <- findRadiusWithMaxGOE(input, rvalues$dataFromFile, rvalues$coord)
  maxRadius <- GOEresult[1]
  rvalues$displace <- maxRadius

  goodnessscore <-  as.numeric(sprintf("%.4f",as.numeric(GOEresult[2])))

  #calculate coordinates for networks using ExplodeLayout at different radius, then store them
  #coordinates.dat and statList.dat files are saved
  returnExplodeNetwork <- storeCoordinatesForExplodeLayout(rvalues$dataFromFile, input, rvalues$coord, rvalues)

  explode.net <- getCoordinatesForExplodeLayout(rvalues$dataFromFile, input, rvalues$coord, rvalues)

  if(rvalues$dataFromFile$nclust != 1){
    explodeGoodness <- calculateScore(input, explode.net, rvalues$dataFromFile$nclust)
  } else{
    explodeGoodness <- "NA"
  }

  explode.net <- formatExplodeNetMatrix(explode.net, rvalues, input)
  #cat(paste(input$displace, ";", explodeGoodness,"\n", sep=""))

  edgelist <- rvalues$dataFromFile$edgelist
  colnames(edgelist) <- c('V1', 'V2', 'Weight')
  write.table(edgelist, paste(projName,'edgelist.dat', sep = '_'), sep = "\t", quote = FALSE, row.names = FALSE)
  write.table(returnExplodeNetwork, paste(projName,'coordinates.dat', sep = '_'), sep = "\t", quote = FALSE, row.names = FALSE )


  ex_output <- list()
  ex_output$coordinates <- returnExplodeNetwork
  ex_output$edgelist <- edgelist
  return (ex_output)
}
UTMB-DIVA-Lab/epl documentation built on July 28, 2019, 5:53 a.m.