R/preprocess_cv.R
In BHPMF: Uncertainty Quantified Matrix Completion using Bayesian Hierarchical Matrix Factorization

PreprocessCv <- function(X, hierarchy.info, num.folds, tmp.dir, verbose) {
  #Missing Values should be stored as NA
  
  # source("countNumTraits.R")
  # source("generatePermutation.R")
  # source("splitData.R")
  # source("findFold.R")
  # source("buildUpperLevelMat.R")
  
  # library(Matrix)
  
  if (!sum(is.na(X))) {
    stop("No missing value in X! Missing Values should be stored as NA.")
  }
  
  if (num.folds < 2) {
    stop("num.folds should be greater than 2.")
  }
  
  if (!is.matrix(X)) {
    stop("X should be a matrix. You may use as.matrix to convert X to a matrix.")
  }
  
  nrows = nrow(X);
  ncols = ncol(X);
  
  if (nrow(hierarchy.info) != nrows) {
    stop("Number of rows of hierarchy.info and X should be the same!")
  }
  
  num.hierarchy.levels <- ncol(hierarchy.info)
  
  column_names <- colnames(X)
  
  # check if all columns has at least one observation. 
  for (ind in 1: ncols) {
    if (!sum(!is.na(X[, ind]))) {
      stop("there is no available observation for column", ind, "!\n")
    }
  }
  
  # check if the hierarchy.info contains all information for all observations
  for (ind in 1: num.hierarchy.levels) {
    if (sum(!is.na(hierarchy.info[, ind])) < nrows) {
      stop("there are observation with no hierarchy info at level", ind, "!\n")
    }
  }
  
  # print("Count Number of Traits per Observation ...")
  # noTraits: a vector of size observation containing number of available trait per observation
  # indObs:  a vector containing index of observation with at least one available trait
  # traitIds: containing index of available trait per observation
  res <- countNumTraits(X, 1);
  noTraits <- res[[1]];
  indObs <- res[[2]];
  traitIds <- res[[3]];
  fileName  <- paste(tmp.dir, "/freqObsNoTraits_pmf.Rda", sep="");
  save(noTraits, indObs ,traitIds, file=fileName)
  
  if (length(indObs) < nrows) {
    stop("There are observations with all features missing! \n 
         Observations with all features missing should be removed!")
  }
  
  # find non-zero elements
  Idx <- which(!is.na(X), arr.ind=TRUE);
  val <- X[Idx];
  
  # Y: 3-column matrix: sparse representation of trait info
  # each row is a triple (i, j, value)
  # i: observation index (row index)
  # j: feature index (column index)
  # vlaue: the observed value
  Y <- cbind(Idx, val);
  rm(Idx, val);
  
  file_name <-paste(tmp.dir, "/ProcessedData_pmf.Rda", sep="")
  save (X, Y, column_names, file = file_name)
  rm(file_name, res)
  
  # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  # %%Split Dataset to training, test, validation set
  # %% Similar to CV (10 fold) some data may appear more than once in test set
  # %% (not completly disjoint)
  # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  # call generatePermutation
  ptm <- proc.time()
  # permute_file <-paste(output_dir, "/permutationMat.Rda", sep="");
  file_name <-paste(tmp.dir, "/traits_trees_atleastone_", sep="");
  tree_file <-paste(tmp.dir, "/ProcessedData_pmf.Rda", sep="");
  # split the dataset for cross validation
  if(verbose) {
    cat("Splitting the data ...")
  }
  # find_folds(X, num.folds, file_name, tree_file, output_dir, 0, trait_names, 1, num.hierarchy.levels)
  find_folds(X, num.hierarchy.levels, num.folds, 0, tmp.dir)
  
  if (num.hierarchy.levels > 1) {
    if(verbose) {
      cat("preprocess the hierachy: \n")
    }
    # source("findPhyloInfo.R")
    PreprocessHierarchy(X, hierarchy.info, tmp.dir)
    if(verbose) {
      cat("\n Building upper level matrices...")
    }
    # build upper level matrices for each fold
    buildUpperLevelMat(tmp.dir, 0, column_names, num.hierarchy.levels, num.folds, verbose)
    
    if(verbose) {
      cat("preprocessing time: ", proc.time() - ptm, "\n")
    }
  } else {
    num.nodes.per.level <- rep(0, 1)
    num.nodes.per.level[1] <- nrow(X)
    file_name <-paste(tmp.dir, "/processed_hierarchy_info.Rda", sep="")
    save(num.nodes.per.level, file=file_name)
  }
  
  }

Any scripts or data that you put into this service are public.

BHPMF documentation built on June 20, 2017, 9:10 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

BHPMF
Uncertainty Quantified Matrix Completion using Bayesian Hierarchical Matrix Factorization

R/preprocess_cv.R
In BHPMF: Uncertainty Quantified Matrix Completion using Bayesian Hierarchical Matrix Factorization

Defines functions PreprocessCv

Try the BHPMF package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BHPMF Uncertainty Quantified Matrix Completion using Bayesian Hierarchical Matrix Factorization

R/preprocess_cv.R In BHPMF: Uncertainty Quantified Matrix Completion using Bayesian Hierarchical Matrix Factorization

Defines functions PreprocessCv

Try the BHPMF package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BHPMF
Uncertainty Quantified Matrix Completion using Bayesian Hierarchical Matrix Factorization

R/preprocess_cv.R
In BHPMF: Uncertainty Quantified Matrix Completion using Bayesian Hierarchical Matrix Factorization