inst/doc/example.R

## ----fig.width=11,fig.height=11,message=FALSE---------------------------------



## ----message=FALSE------------------------------------------------------------
 library(imcExperiment)
 showClass("imcExperiment")

  #10 cells with 10 proteins
  # 10 neighbors 
 # and square distance matrix
 # note that for SCE objects the columns are the cells, and rows are proteins
  x<-imcExperiment(cellIntensity=matrix(1,nrow=10,ncol=10),
	coordinates=matrix(1,nrow=10,ncol=2),
	neighborHood=matrix(1,nrow=10,ncol=10),
	network=data.frame(matrix(1,nrow=10,ncol=10)),
	distance=matrix(1,nrow=10,ncol=10),
	morphology=matrix(1,nrow=10,ncol=10),
	uniqueLabel=paste0("A",seq_len(10)),
	panel=letters[1:10],
	ROIID=data.frame(ROIID=rep("A",10)))
  

 #7 cells with 10 proteins
 # but the spatial information is 10 rows and this will fail to build.
 #x<-imcExperiment(cellIntensity=matrix(1,nrow=7,ncol=10),
#	coordinates=matrix(1,nrow=10,ncol=2),
#	neighborHood=matrix(1,nrow=10,ncol=20),
#	network=matrix(1,nrow=10,ncol=3),
#	distance=matrix(1,nrow=10,ncol=2),
#	uniqueLabel=rep("A",10),
#	ROIID=data.frame(ROIID=rep("A",10)))


 


## ----access,eval=TRUE,message=FALSE-------------------------------------------

  #get cellintensities
  cellIntensity(x)
  #set intensities
  newIn<-matrix(rnorm(100,2,5),nrow=10,ncol=10)
  rownames(newIn)<-rownames(x)
  colnames(newIn)<-colnames(x)
   cellIntensity(x)<-newIn
  cellIntensity(x)==newIn

 # if we want to store both the raw and normalized values in assays we can.
  assays(x,withDimnames = FALSE)$raw<-matrix(1,nrow=10,ncol=10)
  #store the normalized values.
   cellIntensity(x)<-asinh(counts(x)/0.5)
   all(cellIntensity(x)==asinh(assays(x)$counts/0.5))
  x



 ## access the coordinates
  getCoordinates(x)
  getCoordinates(x)<-matrix(rnorm(20,0,10),nrow=10,ncol=2)
head(  getCoordinates(x))
  
 ## access the neighborhood profile.  Note each row must equal the number of cells, but the columns can be extended depending on the radius of interactions.
  ## access the coordinates
  getNeighborhood(x)
  getNeighborhood(x)<-matrix(rnorm(100,1,5),nrow=10,ncol=10)
 head( getNeighborhood(x))

 ## get the distance usually a square matrix, or can be just first nearest etc. 
    getDistance(x)
  getDistance(x)<-matrix(rnorm(100,1,5),nrow=10,ncol=10)
  head(getDistance(x))

 # get morphological features
  getMorphology(x)
  getMorphology(x)<-matrix(rnorm(100,1,5),nrow=10,ncol=60)
  head(getMorphology(x))

 ## for each cell we can obtain the ROI that it belongs to
  rowData(x)
 ## if we want to add patient features to each ROI we can
  metas<-data.frame(ROIID=factor(rep("A",10)),treatment=factor(rep('none',10)))
  colData(x)<-DataFrame(metas)

 ##other slots for covariates
   metadata(x)$experiment<-'test'
   metadata(x)

## ----message=FALSE------------------------------------------------------------
  ### load the data from package.
  library(imcExperiment)
 ##load the data 1000 cells from IMC experiment.
  data(data)
  dim(data)
  ##output from histoCAT to R
  expr<-data[,3:36]
  normExp<-percentilenormalize(data=expr,percentile=0.99)
  normExp<-as.matrix(normExp)


  ##spatial component
  spatial<-(data[,c("X_position","Y_position")])
  spatial<-as.matrix(spatial)
 ##uniqueLabel
  uniqueLabel<-paste0(data[,"ImageId"],"_",data[,"CellId"])

 phenotypes<-data[,grepl("Phenograph",colnames(data))]
 morph<-as.matrix(data[,c("Area","Eccentricity",
	"Solidity",
	"Extent",
	"Perimeter")])

  x<-imcExperiment(cellIntensity=t(normExp),
	coordinates=spatial,
	neighborHood=as.matrix(data[,grepl("neighbour_",colnames(data))]),
	network=phenotypes,
	distance=matrix(1,nrow=nrow(data),ncol=10),
	morphology=morph,
	panel=colnames(normExp),
	uniqueLabel=paste0(data$ImageId,"_",data$CellId),
	ROIID=data.frame(ROIID=data$ImageId))

 ## explore the container.
  dim(assay(x))
   colData(x)$treatment<-DataFrame(treatment=rep('none',1000))
   head(colData(x))
 head( colnames(x))
  rownames(x)
  
  ## Intensity
  all(t(cellIntensity(x))==normExp)
  head(t(cellIntensity(x)))
  ##coordinate
  all(getCoordinates(x)==spatial)
  head(getCoordinates(x))
  #neighbor attraction data form histoCAT
 all(getNeighborhood(x)==as.matrix(data[,grepl("neighbour_",colnames(data))]))
 head(getNeighborhood((x)))
 ##phenotype cluster ID
  head(getNetwork(x))
 all(getNetwork(x)==phenotypes)
 ###distance calculations
  head(getDistance(x))
  ##morphology
   all(getMorphology(x)==morph)
  head(getMorphology(x))
  ##uniqueLabel
   head(getLabel(x))
   all(getLabel(x)==paste0(data$ImageId,"_",data$CellId) )

## -----------------------------------------------------------------------------
 

 ### inherited accessor.
  pca_data <- prcomp(t(counts(x)), rank=50)
tsDat<-data.frame(tsne.x=data$tSNE4148542692_1,tsne.y=data$tSNE4148542692_2,row.names=colnames(x))

reducedDims(x)<-list(PCA=pca_data$x,TSNE=tsDat)
x
reducedDimNames(x)
     dim(reducedDims(x)$PCA)
     dim(reducedDims(x)$TSNE)

 imc<-x
 x<-NULL

## ----pheno,eval=FALSE,fig.width=11,fig.height=11,message=FALSE----------------
#  
#   ### create  phenotypes via Rphenograph
#    ##run phenograph
#    library(Rphenograph)
#  library(igraph)
#    phenos<-Rphenograph(t(cellIntensity(imc)),k=35)
#     pheno.labels<-as.numeric(igraph::membership(phenos[[2]]))
#     getNetwork(imc)<-data.frame(cell_clustering=pheno.labels)
#     head( getNetwork(imc))
#    ##plot phenograph
#    #plot_clustering_heatmap_wrapper(myExperiment=imc)

## ----fig.width=11,fig.height=11-----------------------------------------------

### reading in a directory of histoCAT csv files.

 ##need to reconstruct the fold revised IMC data.
  ##merges a folder full of histoCAT csv files.
 # use morphology and the 1-10 neighbors.
  dataSet<-NULL
 for(i in c("case8.csv","case5.csv")){
  message(i)
    fpath <- system.file("extdata", i, package="imcExperiment")
    case1<-read.csv(fpath)
   proteins<-colnames(case1)[grepl("Cell_",colnames(case1))]
   neig<-colnames(case1)[grepl("neighbour_",colnames(case1))][1:10]
    case1<-case1[,c("ImageId","CellId","X_position","Y_position",proteins,
                    "Area",
                    "Eccentricity",
	"Solidity",
	"Extent",
	"Perimeter",
	 neig)]
    dataSet<-rbind(dataSet,case1)
  }
 
 expr<-dataSet[,proteins]
  normExp<-percentilenormalize(data=expr,percentile=0.99)
  normExp<-as.matrix(normExp)
  boxplot(normExp)

  ##spatial component
  spatial<-(dataSet[,c("X_position","Y_position")])
  spatial<-as.matrix(spatial)
 ##uniqueLabel
  uniqueLabel<-paste0(dataSet[,"ImageId"],"_",dataSet[,"CellId"])

  ##not yet assigned
  phenotypes<-matrix(0,nrow(dataSet),1)
  ROIID=data.frame(ROIID=dataSet[,"ImageId"])
  morph<-dataSet[,c("Area",
                    "Eccentricity",
	"Solidity",
	"Extent",
	"Perimeter")]
  
  x<-imcExperiment(cellIntensity=t(normExp),
	coordinates=spatial,
	neighborHood=as.matrix(dataSet[,grepl("neighbour_",colnames(dataSet))]),
	network=phenotypes,
	distance=matrix(1,nrow=nrow(dataSet),ncol=10),
	morphology=morph,
	panel=colnames(normExp),
	uniqueLabel=uniqueLabel,
	ROIID=ROIID)
   x
   
   
   #require(Rphenograph); library(igraph)
  #phenos<-Rphenograph(t(cellIntensity(x)),k=35)
  # pheno.labels<-as.numeric(membership(phenos[[2]]))
  # getNetwork(x)<-data.frame(cell_clustering=pheno.labels)
  #  head(getNetwork(x))
 ##plot phenograph
  #plot_clustering_heatmap_wrapper(myExperiment=x)

## -----------------------------------------------------------------------------
 ##store the ROIID in the metadata columns.
  ##access the unique cell labels.

  tail(getLabel(x))
  
  roi<-subsetCase(x,372149 )
  roi
  table(sapply(strsplit(getLabel(roi),"_"),function(x) x[1]))

## -----------------------------------------------------------------------------


  ##you can append more clinical features to the columns of the sampleDat data.frame.
  H<-imcExperimentToHyperFrame(imcExperiment=x,phenotypeToUse = 1)
  helper<-function(pp=NULL,i=NULL,j=NULL){
  ps<-split(pp)
  nnd<-nncross(ps[[i]],ps[[j]])
  }
 ## 1-NN analysis.
  ## compare cluster 10 to cluster 9
   #eachNND<-with(H,helper(pp=point,i="10",j="8"))
 ## first choose 2 clusters to compare.
 # sapply(eachNND,function(x) mean(x[,"dist"]))




## ----classChange,eval=FALSE,fig.width=9,fig.height=9,message=FALSE------------
#    library(CATALYST)
#  library(cowplot)
#  library(flowCore)
#  library(ggplot2)
#  library(SingleCellExperiment)
#  library(diffcyt)
#  
#   ## from IMCexperiment to SCE
#   sce<- SingleCellExperiment(x)
#   class(sce)
#  
#   ## FIX ME: add the SOM algorithms for over-clustering and meta-clustering by using class inheritance.
#  
#   #### for the cytof, the columns are sample info and the rows are cells. it uses the SummarizedExperiment format.
#  
#  
#  
#   ## change into a flowFrame?
#  
#   ## change into a daFrame (CATALYST)
#  
#   #data("data")
#  
#  
#   rd<-DataFrame(colData(imcdata))
#  
#    marker_info<-data.frame(channel_name=sapply(strsplit(rownames(imcdata),"_"),function(x) x[3]),
#                marker_name=rownames(imcdata),
#                marker_class=c(rep("type",13),
#                               rep("state",18),
#                               rep("none",13)))
#  
#    ## Switching into Summarized Experiment class.
#      dse<-SummarizedExperiment(assays=SimpleList(exprs=t(cellIntensity(imcdata))),
#                             rowData=rd,
#                             colData=DataFrame(marker_info)
#                             )
#     stopifnot(all(colnames(dse)==marker_info$marker_name))
#     dse
#  
#   # Transform data recommended
#   dse <- transformData(dse)
#  ## maybe look a the normalization.
#  # Generate clusters
#  dse <- (generateClusters(dse,meta_clustering=TRUE,meta_k=30,seed_clustering=828))
#  
#   ## examine each cluster.
#  cluster<-rowData(dse)
#   data<-data.frame(t(logcounts(imcdata)),cluster)
#   #plot_matrix_heatmap_wrapper(expr=data[,rownames(imcdata)],
#    #                               cell_clustering=data$cluster_id)
#   #
#  
#  

## ---- eval=FALSE,message=FALSE------------------------------------------------
#  
#  
#     ## the assay returns matrix class! required for CATALYST.
#    is(assay(imcdata,'counts'),'matrix')
#    rownames(imcdata)
#      # for plot scatter to work need to set the rowData feature in a specific way.
#     channel<-sapply(strsplit(rownames(imcdata),"_"),function(x) x[3])
#        channel[34:35]<-c("Ir1911","Ir1931")
#  
#     marker<-sapply(strsplit(rownames(imcdata),"_"),function(x) x[2])
#      rowData(imcdata)<-DataFrame(channel_name=channel,marker_name=marker)
#        rownames(imcdata)<-marker
#              plotScatter(imcdata,rownames(imcdata)[17:18],assay='counts')
#  
#    # convert to flowSet
#               ## the warning has to do with duplicated Iridium channels.
#     table(colData(imcdata)$ROIID)
#         (fsimc <- sce2fcs(imcdata, split_by = "ROIID"))
#      ## now we have a flowSet.
#     pData(fsimc)
#     fsApply(fsimc,nrow)
#     dim(exprs(fsimc[[1]]))
#     exprs(fsimc[[1]])[1:5,1:5]
#      ## set up the metadata files.
#     head(marker_info)
#  
#      exper_info<-data.frame(group_id=colData(imcdata)$Treatment[match(pData(fsimc)$name,colData(imcdata)$ROIID)],
#                             patient_id=colData(imcdata)$Patient.Number[match(pData(fsimc)$name,colData(imcdata)$ROIID)],
#                             sample_id=pData(fsimc)$name)
#  
#     ## create design
#     design<-createDesignMatrix(
#       exper_info,cols_design=c("group_id","patient_id"))
#  
#     ##set up contrast
#     contrast<-createContrast(c(0,1,0))
#     nrow(contrast)==ncol(design)
#     data.frame(parameters=colnames(design),contrast)
#  
#      ## flowSet to DiffCyt
#      out_DA<-diffcyt(
#        d_input=fsimc,
#        experiment_info=exper_info,
#        marker_info=marker_info,
#        design=design,
#        contrast=contrast,
#        analysis_type = "DA",
#        seed_clustering = 123
#      )
#     topTable(out_DA,format_vals = TRUE)
#  
#     out_DS<-diffcyt(
#       d_input=fsimc,
#       experiment_info=exper_info,
#       marker_info=marker_info,
#       design=design,
#       contrast=contrast,
#       analysis_type='DS',
#       seed_clustering = 123,
#       plot=FALSE)
#  
#        topTable(out_DS,format_vals = TRUE)
#  
#         ### from flowSet to SE.
#   d_se<-prepareData(fsimc,exper_info,marker_info)
#  
#  
#  

## ----diffcyt,eval=FALSE-------------------------------------------------------
#   library(diffcyt)
#  # Function to create random data (one sample)
#  d_random <- function(n = 20000, mean = 0, sd = 1, ncol = 20, cofactor = 5) {
#    d <- sinh(matrix(rnorm(n, mean, sd), ncol = ncol)) * cofactor
#    colnames(d) <- paste0("marker", sprintf("%02d", 1:ncol))
#    d
#  }
#  # Create random data (without differential signal)
#  set.seed(123)
#  d_input <- list(
#    sample1 = d_random(),
#    sample2 = d_random(),
#    sample3 = d_random(),
#    sample4 = d_random()
#  )
#  experiment_info <- data.frame(
#    sample_id = factor(paste0("sample", 1:4)),
#    group_id = factor(c("group1", "group1", "group2", "group2")),
#    stringsAsFactors = FALSE
#  )
#  marker_info <- data.frame(
#    channel_name = paste0("channel", sprintf("%03d", 1:20)),
#    marker_name = paste0("marker", sprintf("%02d", 1:20)),
#    marker_class = factor(c(rep("type", 10), rep("state", 10)),
#                          levels = c("type", "state", "none")),
#    stringsAsFactors = FALSE
#  )
#  # Prepare data
#  d_se <- prepareData(d_input, experiment_info, marker_info)
#  # Transform data
#  d_se <- transformData(d_se)
#  # Generate clusters
#  d_se <- generateClusters(d_se)

Try the imcExperiment package in your browser

Any scripts or data that you put into this service are public.

imcExperiment documentation built on Aug. 19, 2021, 5:07 p.m.