R/execFunctions.R

Defines functions get.expression.slice select.closest.AP append.cluster.to.spots.table name.clusters

## ----------- Expression -----------

#Returning spots with coordinates, expression level and color scale
get.expression.slice <- function(gene, spots.table, st.data, min.expr, log.scale, normalized.scale, slice_index, rbPal){

  #gene(str): gene of intereset
  #spots.table
  #st.data
  #min.expr(float): between 0 and 1, indicates percentile of expression. Above 1: integer cutoff value
  #log.scale(bool): using a logarithmic scale
  #normalized.scale(bool): normalized expression by peak expression across the brain
  #slice_index(char): vector of slices indexes
  #rbPal(func): palette function from RColorBrewer

  dataset <- NULL

  for (i in 1:length(slice_index)){

    #Case with negative values
    if (min(st.data[,gene]) < 0)
      offset <- -min(st.data[,gene])
    else
      offset <- 0

    #Selecting appropriate spots
    dataset[[i]] <- spots.table[spots.table$slice_index == slice_index[i],c('ML','DV','AP','acronym','x','y')]

    #Extracting the expression of gene of interest
    dataset[[i]]$expr <- st.data[rownames(dataset[[i]]),gene]

    #Adding the offset
    dataset[[i]]$expr <- dataset[[i]]$expr + offset

    #This adds a column of color values
    # based on the y values
    if (min.expr >= 1)
      dataset[[i]] <- dataset[[i]][dataset[[i]]$expr >= (min.expr + offset),]
    else if(min.expr >0){
      dataset[[i]] <- dataset[[i]][(dataset[[i]]$expr > as.numeric(quantile(dataset[[i]]$expr,min.expr))) == TRUE,]
    }

    if (normalized.scale)
      dataset[[i]]$expr <- (dataset[[i]]$expr / (max(st.data[,gene]) + offset))*100

    m.val <- max(dataset[[i]]$expr) - offset
    min.val <- min(dataset[[i]]$expr) - offset
    if (log.scale){
      dataset[[i]]$expr[dataset[[i]]$expr < 0.98] <- 0.98
      dataset[[i]]$expr <- dataset[[i]]$expr + 0.01
      dataset[[i]]$expr <- ceiling(5*log(dataset[[i]]$expr))
    }

    if (normalized.scale){
      if (log.scale)
        dataset[[i]]$col <- rbPal(ceiling(5*log(101))+1)[(as.numeric(dataset[[i]]$expr)+1)]
      else
        dataset[[i]]$col <- rbPal(101)[(as.numeric(dataset[[i]]$expr)+1)]
      m.val <- 100
      min.val <- 0
    }else{
      dataset[[i]]$col <- rbPal(ceiling(max(dataset[[i]]$expr)+1))[(ceiling(as.numeric(dataset[[i]]$expr)+1))]
    }
  }
  dataset[[(length(dataset)+1)]] <- m.val
  dataset[[(length(dataset)+1)]] <- min.val
  return(dataset)
}

## ----------- Selection -----------

#Returns the closest AP value which is element of the spots.table
select.closest.AP <- function(AP, spots.table){
  #AP(float): desired AP
  #spots.table

  AP.list <- unique(spots.table$AP)
  AP.diff <- abs(AP.list - AP)
  return(AP.list[which.min(AP.diff)])
}

## -----------  Clusters -----------

#Appending clusters from a cluster file
append.cluster.to.spots.table <- function(spots.table, cluster.list){

  #spots.table
  #clusters(str or Seurat): clusters are in @ident if Seurat, path to a .tsv file containing clusters otherwise
  #name.clusters(bool): if true, clusters get a name assigned. Otherwise, the ID is used as a name
  #min.cluster.size(int): clusters with less elements than this value will be discarded

  #Case where a path to .tsv was sent
  t <- read.table(cluster.list,stringsAsFactors = FALSE)
  spots.table <- spots.table[t[,1],]
  spots.table$cluster <- NULL
  spots.table$clusters.named <- NULL
  t[is.na(t[,2]),2] <- -1
  spots.table[t[,1],'cluster'] <- factor(as.numeric(t[,2]))

  return(spots.table)
}

#Name the clusters
name.clusters <- function(spots.table){

  df.clust <- data.frame()
  clusters <- spots.table$cluster

  for (i in levels(clusters)){

    spots.analysis <- spots.table[spots.table$cluster == i,]
    spots.analysis$full.name.parent <- as.character(spots.analysis$full.name.parent)

    a <- spots.analysis %>%
      group_by(spots.analysis$full.name.parent) %>%
      dplyr::summarise(count = length(cluster),percent = count/length(spots.analysis$cluster)*100) %>%
      dplyr::arrange(desc(count))

    majority <- a[a$percent > 50,]

    if (dim(majority)[1] == 0){
      majority <- as.data.frame(a[1,])
    }else{
      majority <- as.data.frame(majority)
    }

    row.names(majority) <- i
    df.clust <- rbind(df.clust,majority)
  }


  colnames(df.clust)[1] <- 'name'
  df.clust$name[df.clust$percent < 50] <- 'Mixed'

  for (n in unique(df.clust$name)){
    l <- which(df.clust$name == n)
    if (length(l) == 1){
      df.clust$name.cluster[l] <- n
    }else{
      for (k in 1:length(l)){
        if (k >= 10){
          df.clust$name.cluster[l[k]] <- sprintf('%s-%d',n,k)
        }else{
          df.clust$name.cluster[l[k]] <- sprintf('%s-0%d',n,k)
        }
      }
    }
  }

  spots.table$clusters.named <- mapvalues(spots.table$cluster,from = as.numeric(row.names(df.clust)),to = df.clust$name.cluster)
  spots.table$clusters.named <- factor(spots.table$clusters.named)
  spots.table$clusters.named <- factor(spots.table$clusters.named, levels(spots.table$clusters.named)[order(tolower(levels(spots.table$clusters.named)))])

  return(spots.table)
}
cantin-ortiz/stExpressionViewer documentation built on May 29, 2019, 11:02 p.m.