FSPmix: Exploratory tool for fast unsupervised features selection and clustering using mixture models

Documented in SimulationStudy1

#' Simulation Study 1 - simulation of 20 genes
#' Generate gene expression data with two latent groups (A and B)
#' TODO Intention is to modify this for a more realistic real-life scenario
#' input
#' sEED: random seed generator
#' no.ppl: Even interger - number of observations (or participants) to simulate
SimulationStudy1<- function(sEEd = 123, no.ppl = 1000){

  library(ggplot2)
  library(reshape2)
  library(dplyr)
  library(mixtools)

  set.seed(sEEd)
  #set.seed(789)  # NOTE: different seeds lead to different results

  no.genes = 20
  dat.2<- data.frame(V1 = rep(0, no.ppl))

  ## **********************************************************
  ## simulate 10 RIGHT skewed data
  no.gene.rs = 10

  from = -1.5; to= -1  # <-- group A
  mean1 <- seq(from = from, to=to, by = ((to - from)/(no.gene.rs - 1)))
  var1<- runif(no.gene.rs, min = 0.5, max =0.7)


  from = 3; to= 2  # <--- group B
  mean2<- seq(from = from, to=to, by=((to - from)/(no.gene.rs - 1)))
  from=5; to=10
  var2<- seq(from = from, to= to, by = ((to - from)/(no.gene.rs - 1)))

  dat.2<- data.frame(V1 = rep(NA, no.ppl))
  for(i in 1:no.gene.rs){
    dat.2[1:(no.ppl/2), i]<- rnorm(no.ppl/2, mean = mean1[i], sd = sqrt(var1[i]) )
    dat.2[(no.ppl/2 +1):no.ppl, i]<- rnorm(no.ppl/2, mean = mean2[i], sd = sqrt(var2[i]) )
  }

  dat.2r = mutate(dat.2, ppl = 1:no.ppl,
                  group = factor(c(rep("A", no.ppl/2), rep("B", no.ppl/2)), levels = c("A", "B")))

  gene.list<- paste("gene.", 1:no.gene.rs, sep = "")
  colnames(dat.2r)[1:no.gene.rs]<- gene.list

  dat.right.skewed<- dat.2r
  dat.2ar<- melt(dat.2r, id.vars = c("ppl", "group"))



  ## ******************************************************************
  ## simulate 10 LEFT skewed data
  no.gene.ls = 10

  from = 1.5; to= 1  # <-- group
  mean1<- seq(from = from, to=to, by=((to - from)/(no.gene.rs - 1)))
  var1<- runif(no.gene.rs, min = 0.5, max =0.7)

  from = -3; to= -2  # <-- group
  mean2 <- seq(from = from, to=to, by = ((to - from)/(no.gene.rs - 1)))
  from=5; to=10
  var2<- seq(from = from, to= to, by = ((to - from)/(no.gene.rs - 1)))


  dat.2<- data.frame(V1 = rep(NA, no.ppl))
  for(i in 1:no.gene.ls){
    #dat.2[1:(no.ppl/2), i]<- rnorm(no.ppl/2, mean = mean1[i], sd = sqrt(var1[i]) )
    #dat.2[(no.ppl/2 +1):no.ppl, i]<- rnorm(no.ppl/2, mean = mean2[i], sd = sqrt(var2[i]) )
    dat.2[(no.ppl/2 +1):no.ppl, i]<- rnorm(no.ppl/2, mean = mean1[i], sd = sqrt(var1[i]) )
    dat.2[1:(no.ppl/2), i]<- rnorm(no.ppl/2, mean = mean2[i], sd = sqrt(var2[i]) )
  }

  dat.2l = mutate(dat.2, ppl = 1:no.ppl,
                  group = factor(c(rep("A", no.ppl/2), rep("B", no.ppl/2)), levels = c("A", "B")))

  gene.list<- paste("gene.",(no.gene.ls+1):(2*no.gene.ls), sep = "")
  colnames(dat.2l)[1:no.gene.ls]<- gene.list

  dat.left.skewed<- dat.2l
  dat.2al<- melt(dat.2l, id.vars = c("ppl", "group"))


  ###
  ###
  ### Combine both data sets

  # check
  #all.equal(dat.right.skewed$group, dat.left.skewed$group)
  #all.equal(dat.right.skewed$ppl, dat.left.skewed$ppl)

  dat<- cbind(dat.right.skewed[, 1:10], dat.left.skewed)
  dat.2all<- melt(dat, id.vars = c("ppl", "group"))

  colnames(dat)[1:20]<- paste("Feature.", 1:20, sep = "")
  dat.2all<- melt(dat, id.vars = c("ppl", "group"))
  # check allocation of groups A and B
  p1<- ggplot(dat.2all, aes(x = value, fill = group)) + geom_density(alpha = 0.2) +
    facet_wrap(~variable, scales="free", ncol = 5) +
    theme_bw() + ggtitle("Simulated synthetic feature data: colour coded into groups") + xlab("Simulated normalised feature") +
    theme(legend.position="bottom") +
    theme(strip.text.x = element_text(size = 10))

  #x11()
  #p1

  p2<- ggplot(dat.2all, aes(x = value)) + geom_density(alpha = 0.2) +
    facet_wrap(~variable, scales="free", ncol = 10) +
    theme_bw() + ggtitle("Simulated left & right skewed data: what FSPmix sees") + xlab("Simulated normalised Gene expression")

  #x11()
  #p2

  #################################
  op<-list(dat = dat,
           plot.ColourCoded.genes = p1,
           plot.WhatFSPmixSees = p2)

  #################################
  return(op)
}

MarcelaCespedes/FSPmix documentation built on May 12, 2020, 5:49 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MarcelaCespedes/FSPmix
Exploratory tool for fast unsupervised features selection and clustering using mixture models

R/SimulationStudy1.r
In MarcelaCespedes/FSPmix: Exploratory tool for fast unsupervised features selection and clustering using mixture models

Defines functions SimulationStudy1

Documented in SimulationStudy1

R Package Documentation

Browse R Packages

We want your feedback!

MarcelaCespedes/FSPmix Exploratory tool for fast unsupervised features selection and clustering using mixture models

R/SimulationStudy1.r In MarcelaCespedes/FSPmix: Exploratory tool for fast unsupervised features selection and clustering using mixture models

Defines functions SimulationStudy1

Documented in SimulationStudy1

R Package Documentation

Browse R Packages

We want your feedback!

MarcelaCespedes/FSPmix
Exploratory tool for fast unsupervised features selection and clustering using mixture models

R/SimulationStudy1.r
In MarcelaCespedes/FSPmix: Exploratory tool for fast unsupervised features selection and clustering using mixture models