R/dummy-datasets-creator.R

#' Function to create some fake dataset for testing purpose
#'
#' Create a dataset with 200 patients and 5 genes:
#'   3 genes contains 150/50 ratio difference with mean obtained with 
#'     runif (n1 , 0, 0.2) / runif(n2, 0.7, 0.99)
#'   2 genes 75/125 ratio difference in the opposite direction obtained with
#'     runif (n3 , 0.6, 1) / runif(n4, 0.2, 0.4)
#'   
#' @param seed the seed number for reproducibility of random numbers
#' 
#' @rdname dummy_datasets
#' @return dataset
#'
#' @importFrom stats runif
#' @export
#' 
dummy_methylation_like_dataset <- function(seed=1234) {
  genesSize <- c(3,2)
  patientsRatio <- list(c(150,50), c(75,125))
  
  set.seed(seed)
  lowValues <- runif(patientsRatio[[1]][1]*genesSize[1], 0, 0.2)
  highValues <- runif(patientsRatio[[1]][2]*genesSize[1], 0.7, 0.99)
  lowValues2nd <- runif(patientsRatio[[2]][1]*genesSize[2], 0.6, 1)
  highValues2nd <- runif(patientsRatio[[2]][2]*genesSize[2], 0.2, 0.4)
  
  fake_data <- rbind(cbind(matrix(lowValues, ncol=patientsRatio[[1]][1], nrow=genesSize[1]),
                           matrix(highValues, ncol=patientsRatio[[1]][2], nrow=genesSize[1])),
                     cbind(matrix(lowValues2nd, ncol=patientsRatio[[2]][1], nrow=genesSize[2]),
                           matrix(highValues2nd, ncol=patientsRatio[[2]][2], nrow=genesSize[2]))
  )
  
  # flat_data <- matrix(c(rep(0.3,191), rep(0.4,9)),ncol=200, nrow=1, 
  #                     dimnames = list(paste0("gene_", seq_len(NROW(flat_data))),
  #                                     paste0("p_", seq_len(NCOL(flat_data)))))
  
  row.names(fake_data) <- paste0("gene_", seq_len(NROW(fake_data)))
  colnames(fake_data) <- paste0("p_", seq_len(NCOL(fake_data)))
  return(fake_data)
}

#' Function to create some fake flat dataset for testing purpose
#'
#' Create a dataset with 200 patients and 1 genes:
#'   generated by 191 0.3 and 9 0.4
#'
#' @inheritParams dummy_methylation_like_dataset
#' 
#' @rdname dummy_datasets
#'
#' @export
#' 
dummy_methylation_like_flat_dataset <- function(seed=1234) {
  flat_data <- matrix(c(rep(0.3,191), rep(0.4,9)),ncol=200, nrow=1,
                      dimnames = list(paste0("gene_", seq_len(1)),
                                      paste0("p_", seq_len(200))))
  
  row.names(flat_data) <- paste0("gene_", seq_len(NROW(flat_data)))
  colnames(flat_data) <- paste0("p_", seq_len(NCOL(flat_data)))
  return(flat_data)
}


#' Create a dummy dictionary gene to methylationCluster from a dummy dataset
#' 
#' @param dataset a dummy dataset for the dummy dict
#' 
#' @rdname dummy_datasets
#' 
#' @return dict
#' @export
#' 
create_met_cluster_dict <- function(dataset) {
  dict = as.list(row.names(dataset))
  names(dict) <- row.names(dataset)
  return(dict)
}

#' Function to create some fake dataset for testing purpose
#'
#' Create a dataset with 200 patients and 5 genes:
#'   3 genes contains 150/50 ratio difference with mean obtained with 
#'     rnorm (n1 , 5, 1) / rnorm(n2, 8, 1)
#'   2 genes 75/125 ratio difference in the opposite direction obtained with
#'     rnorm (n3 , 9, 1) / rnorm(n4, 2, 0.4)
#'   
#' @inheritParams dummy_methylation_like_dataset
#' 
#' @rdname dummy_datasets
#' @importFrom stats rnorm
#' 
#' @export
#' 
dummy_expression_like_dataset <- function(seed=1234) {
  genesSize <- c(3,2)
  patientsRatio <- list(c(150,50), c(75,125))
  
  lowValues <- rnorm(patientsRatio[[1]][1]*genesSize[1], 5, 1)
  highValues <- rnorm(patientsRatio[[1]][2]*genesSize[1], 8, 1)
  lowValues2nd <- rnorm(patientsRatio[[2]][1]*genesSize[2], 9, 1)
  highValues2nd <- rnorm(patientsRatio[[2]][2]*genesSize[2], 2, 1)

  fake_exp <- rbind(cbind(matrix(lowValues, ncol=patientsRatio[[1]][1], nrow=genesSize[1]), 
                          matrix(highValues, ncol=patientsRatio[[1]][2], nrow=genesSize[1])),
                    cbind(matrix(lowValues2nd, ncol=patientsRatio[[2]][1], nrow=genesSize[2]), 
                          matrix(highValues2nd, ncol=patientsRatio[[2]][2], nrow=genesSize[2]))
  )
  
  row.names(fake_exp) <- paste0("gene_", seq_len(NROW(fake_exp)))
  colnames(fake_exp) <- paste0("p_", seq_len(NCOL(fake_exp)))
  return(fake_exp)
}

#' Function to create some fake dataset for testing purpose
#'
#' Create a dataset with 200 patients and 5 genes:
#'   3 genes contains 150/50 ratio difference with values obtained by
#'     sampling c(-1,0,0,0,1,1) and c(-2,0,0,0,0,2,2) for low/high
#'   2 genes 75/125 ratio difference in the opposite direction obtained by
#'     sampling c(-2,0,0,0,0,0,2) and c(-1,0,0,0,0,0,1) for high/low
#'   
#' @inheritParams dummy_methylation_like_dataset
#' 
#' @rdname dummy_datasets
#'
#' @export
#' 
dummy_cnv_like_dataset <- function(seed=1234) {
  set.seed(seed)
  genesSize <- c(3,2)
  patientsRatio <- list(c(150,50), c(75,125))
  
  lowValues <- sample(c(-1,0,0,0,1,1), patientsRatio[[1]][1]*genesSize[1],replace = T)
  highValues <- sample(c(-2,0,0,0,0,2,2), patientsRatio[[1]][2]*genesSize[1], replace = T)
  lowValues2nd <- sample(c(-2,0,0,0,0,0,2), patientsRatio[[2]][1]*genesSize[2], replace = T)
  highValues2nd <- sample(c(-1,0,0,0,0,0,1), patientsRatio[[2]][2]*genesSize[2], replace = T)
  
  fake_cnv <- rbind(cbind(matrix(lowValues, ncol=patientsRatio[[1]][1], nrow=genesSize[1]),
                          matrix(highValues, ncol=patientsRatio[[1]][2], nrow=genesSize[1])),
                    cbind(matrix(lowValues2nd, ncol=patientsRatio[[2]][1], nrow=genesSize[2]),
                          matrix(highValues2nd, ncol=patientsRatio[[2]][2], nrow=genesSize[2]))
  )
  
  row.names(fake_cnv) <- paste0("gene_", seq_len(NROW(fake_cnv)))
  colnames(fake_cnv) <- paste0("p_", seq_len(NCOL(fake_cnv)))
  return(fake_cnv)
}
cavei/MOSClip documentation built on May 12, 2019, 5:22 p.m.