R/Run_family.R

Defines functions runVectorFields runPCAprojection runFullclustervalidation runFscore.mat runFscore

Documented in runFscore runFscore.mat runFullclustervalidation runPCAprojection runVectorFields

#' @title  run.fscore
#' @author Dieter Henrik Heiland
#' @description run.fscore
#' @inherit 
#' @return 
#' @examples 
#' 
#' @export
runFscore <- function(df){
  
  res.cluster <- names(df)
  
  mat <- matrix(0,length(res.cluster), length(res.cluster));colnames(mat)=rownames(mat) <- res.cluster
  mat <- mat %>% reshape2::melt()
  purrr::map(.x=1:nrow(mat), .f=function(x){
    mat[x,3]<<-FlowSOM::FMeasure(as.numeric(df[,res.cluster[mat[x,1]]]), as.numeric(df[,res.cluster[mat[x,2]]]))
  })
  mat.m <- mat %>% reshape2::dcast(Var1~Var2, value = "value")
  p <- corrplot::corrplot(mat.m[,-c(1)] %>% as.matrix(), method="circle")
  
  names(mat) <- c("Cluster.M_1","Cluster.M_2", "F-score" )
  
  return(p)
  
}
#' @title  run.fscore.mat
#' @author Dieter Henrik Heiland
#' @description run.fscore.mat
#' @inherit 
#' @return 
#' @examples 
#' 
#' @export
runFscore.mat <- function(df){
  
  res.cluster <- names(df)
  
  mat <- matrix(0,length(res.cluster), length(res.cluster));colnames(mat)=rownames(mat) <- res.cluster
  mat <- mat %>% reshape2::melt()
  purrr::map(.x=1:nrow(mat), .f=function(x){
    mat[x,3]<<-FlowSOM::FMeasure(as.numeric(df[,res.cluster[mat[x,1]]]), as.numeric(df[,res.cluster[mat[x,2]]]))
  })
  mat.m <- mat %>% reshape2::dcast(Var1~Var2, value = "value")
  names(mat) <- c("Cluster.M_1","Cluster.M_2", "F-score" )
  
  return(mat)
  
}
#' @title  run.full.cluster.validation (for Spatial Paper Beta Version!!!!!)
#' @author Dieter Henrik Heiland
#' @description run.full.cluster.validation
#' @inherit 
#' @return 
#' @examples 
#' 
#' @export
runFullclustervalidation <- function(seuratObj, plot.folder){
  
  require(Seurat)
  
  #Subset Tumor
  tumor <- seuratObj@meta.data %>% filter(cell.type=="Tumor") %>% rownames()
  seuratObj <- subset(seuratObj, cells=tumor)
  
  seuratObj <- 
    seuratObj %>% 
    SCTransform(assay="Spatial") %>% 
    RunPCA() %>% 
    RunUMAP(dims = 1:10)
  
  
  seuratObj <- seuratObj %>% run.SNN.stability(assay="SCT", 
                                               reduction="pca",
                                               dims = 1:30, 
                                               resolution = seq(from=0.1, to=1.5, by=0.05), 
                                               cluster_id="Louvain", algorithm=1)
  
  seuratObj <- seuratObj %>% run.SNN.stability(assay="SCT", reduction="pca",
                                               resolution = seq(from=0.1, to=1.5, by=0.05), 
                                               dims = 1:30, cluster_id="SLM",algorithm=3)
  
  if(dir.exists(plot.folder)==F){dir.create(plot.folder)}
  
  
  
  ggsave(Seurat::DimPlot(seuratObj, group.by = "Louvain"),
         filename=paste0(plot.folder, "/DimPlot_cluster_State1_Louvain.png"))
  
  ggsave(Seurat::DimPlot(seuratObj, group.by = "SLM"),
         filename=paste0(plot.folder, "/DimPlot_cluster_State1_SLM.png"))
  
  
  
  ##Add data to matrix
  #Create DF for all clusters
  
  # PAM Cluster
  
  k=length(unique(seuratObj@meta.data$Louvain))
  
  pca <- seuratObj %>% Seurat::Reductions("pca")
  pca <- pca@cell.embeddings %>% as.data.frame()
  pca <- pca[,1:30]
  pam_PCA <- cluster::pam(pca, k=k)
  
  pca <- seuratObj %>% Seurat::Reductions("pca")
  pca <- pca@cell.embeddings %>% as.data.frame()
  pca <- pca[,1:30]
  HC_PCA <- hclust(dist(pca))
  HC_PCA <- cutree(HC_PCA, k=k)
  
  
  
  cluster_summary=data.frame(barcodes=seuratObj@meta.data %>% rownames(), 
                             PCA_1=c(as.numeric(seuratObj@meta.data$Louvain)+1),
                             PCA_2=c(as.numeric(seuratObj@meta.data$SLM)+1),
                             PCA_4=pam_PCA$clustering %>% as.numeric(),
                             PCA_5=HC_PCA %>% as.numeric())
  
  write.csv(cluster_summary, file=paste0(plot.folder, "/cluster_summary.csv"))
  
  #ggsave(run.fscore(cluster_summary[-c(1)]),
  #       filename=paste0(plot.folder, "/Fscore.png"))
  
  write.csv(run.fscore.mat(cluster_summary[-c(1)]), file=paste0(plot.folder, "/Fscore.csv"))
  
  
  prefix = "PCA_"
  out <- clustree::clustree(cluster_summary, 
                            prefix = prefix)
  
  out.plot <- out$data %>% 
    select(!!sym(prefix), sc3_stability) %>% 
    group_by(!!sym(prefix)) %>% 
    summarise(mean(sc3_stability), sd(sc3_stability)) %>% 
    as.data.frame()
  
  
  ## Validate stabolity
  best.cluster <- which.max(out.plot$`mean(sc3_stability)`)
  
  cluster_summary_UMAP=data.frame(barcodes=seuratObj@meta.data %>% rownames(), 
                                  PCA_1=c(as.numeric(seuratObj@meta.data$Louvain)+1),
                                  PCA_2=c(as.numeric(seuratObj@meta.data$SLM)+1),
                                  PCA_3=pam_PCA$clustering %>% as.numeric(),
                                  UMAP1=seuratObj@reductions$umap@cell.embeddings[,1],
                                  UMAP2=seuratObj@reductions$umap@cell.embeddings[,2])
  
  ggsave(clustree::clustree_overlay(cluster_summary_UMAP, 
                                    prefix = prefix, 
                                    x_value = "UMAP1", 
                                    y_value = "UMAP2"),
         filename=paste0(plot.folder, "/ClusterTree_UMAP.png"))
  
  
  
  
  
  # Merge Clusters together
  stable <- 
    cluster_summary_UMAP %>% 
    mutate(stable = PCA_1+PCA_2+PCA_3 ) %>% 
    count(stable) %>% 
    filter(n>200) %>% 
    pull(stable)
  
  # Select stable clusters
  cluster_summary_UMAP <- 
    cluster_summary_UMAP %>% 
    mutate(stable = PCA_1+PCA_2+PCA_3 ) %>% 
    filter(stable %in% {{stable}})
  
  plot <- clustree::clustree_overlay(cluster_summary_UMAP, 
                                     prefix = prefix, 
                                     x_value = "UMAP1", 
                                     y_value = "UMAP2")
  
  ggsave(plot,
         filename=paste0(plot.folder, "/ClusterTree_UMAP_stable.png"))
  
  
  
  for(i in 1:length(unique(cluster_summary_UMAP$stable))){cluster_summary_UMAP$stable[cluster_summary_UMAP$stable==unique(cluster_summary_UMAP$stable)[i]] <- i }
  
  #ggplot(data=cluster_summary_UMAP, aes(x=UMAP1, y=UMAP2, color=as.factor(stable)))+geom_point()+theme_classic()
  
  ## Add consensus clusers to seurat
  
  seuratObj <- subset(seuratObj, cells=cluster_summary_UMAP$barcodes)
  seuratObj <- 
    seuratObj %>% 
    SCTransform(assay="Spatial") %>% 
    RunPCA() %>% 
    RunUMAP(dims = 1:10)
  
  seuratObj@meta.data <- 
    seuratObj@meta.data[cluster_summary_UMAP$barcodes, ] %>% 
    mutate(consensus=cluster_summary_UMAP$stable)
  
  ggsave(Seurat::DimPlot(seuratObj, reduction = "pca", group.by = "consensus" ),
         filename=paste0(plot.folder, "/consensus_PCA.png"))
  ggsave(Seurat::DimPlot(seuratObj, reduction = "umap", group.by = "consensus" ),
         filename=paste0(plot.folder, "/consensus_UMAP.png"))
  
  
  seuratObj <- SetIdent(seuratObj, value=seuratObj@meta.data$consensus)
  
  markers <- FindAllMarkers(seuratObj)
  
  final.clusters <- markers %>% count(cluster) %>% filter(n>50) %>% pull(cluster)
  markers.final <- markers %>% filter(cluster %in% final.clusters)
  
  # Remove duplicated genes 
  markers.final$rows <- 1:nrow(markers.final)
  dup <- markers.final[duplicated(markers.final$gene), ]$gene
  remove <- purrr::map(.x=dup, .f=function(i){
    keep <- markers.final %>% 
      filter(gene=={{i}}) %>% 
      arrange(desc(avg_logFC)) %>% 
      head(1) %>% 
      pull(rows)
    
    remove <- markers.final %>% 
      filter(gene=={{i}}) %>% 
      filter(rows!={{keep}}) %>% 
      pull(rows)
    
    return(remove)
    
  }) %>% unlist()
  markers.final <- markers.final[-c(remove), ]
  final.clusters <- markers.final %>% count(cluster) %>% filter(n>20) %>% pull(cluster)
  markers.final <- markers.final %>% filter(cluster %in% final.clusters)
  rownames(markers.final) <- markers.final$gene
  
  
  ## Add new cluster to seurat
  cluster <- as.numeric(unique(markers.final$cluster))
  bc <- 
    seuratObj@meta.data[seuratObj@meta.data$consensus %in% cluster, ] %>% rownames()
  
  seuratObj <- subset(seuratObj, cells=bc)
  seuratObj <- 
    seuratObj %>% 
    SCTransform(assay="SCT") %>% 
    RunPCA() %>% 
    RunUMAP(dims = 1:10)
  
  ggsave(Seurat::DimPlot(seuratObj, reduction = "pca", group.by = "consensus" ),
         filename=paste0(plot.folder, "/consensus_PCA_final.png"))
  ggsave(Seurat::DimPlot(seuratObj, reduction = "umap", group.by = "consensus" ),
         filename=paste0(plot.folder, "/consensus_UMAP_final.png"))
  
  
  
  return(list(seuratObj,markers.final ))
}


#' @title  run.full.cluster.validation (for Spatial Paper Beta Version!!!!!)
#' @author Dieter Henrik Heiland
#' @description run.full.cluster.validation
#' @inherit 
#' @return 
#' @examples 
#' @export
runPCAprojection <- function(PCA,
                             matrix.input, 
                             matrix.transfer, 
                             Dim=2, 
                             var=400, 
                             epochs=50){
  
  note=function(text){return(print(message(paste0(text))))}
  
  
  scaled_org <- matrix.input
  scales_target <- matrix.transfer
  
  #######
  note(c("Start Analysis using ", Dim, " Dimensions"))
  #######
  
  #Train Model for PCA coordinates using a bidirectional RNN
  
  require(keras)
  require(tensorflow)
  require(tfdatasets)
  require(tidyverse)
  
  
  #dim(scaled_org); dim(scaled_target)
  
  # 1. Set sup the data set 
  inter <- generics::intersect(rownames(scaled_org), base::rownames(scaled_target))
  scaled_org <- scaled_org[inter, ]
  scaled_target <- scaled_target[inter, ]
  
  
  
  
  # 2. Select most var Features
  variance <- 
    base::data.frame(var=apply(scaled_org,1, var)) %>% 
    tibble::rownames_to_column("gene") %>% 
    dplyr::arrange(desc(var)) %>% 
    utils::head(var) %>% 
    dplyr::pull(gene)
  
  # 3. Prepare data 
  x_train <- base::scale(t(scaled_org[variance, ]))
  y_train <- PCA[,1:Dim]
  dim(y_train); dim(x_train)
  
  #plot(x_train[,2])
  
  x_test <- t(scaled_target[variance, ])
  dim(x_test)
  
  # 4. Run prediction 
  
  colnames=base::colnames(x_train)
  
  test_df <- x_test %>% 
    tibble::as_tibble(.name_repair = "minimal") %>% 
    stats::setNames(paste0("Gene_", 1:var))
  
  
  Predictions <- purrr::map(.x=1:Dim, .f=function(x){
    
    message(paste0(Sys.time(),"   Start Predict Dimension: ", "PC_", x ))
    
    train_df <- x_train %>% 
      tibble::as_tibble(.name_repair = "minimal") %>% 
      stats::setNames(paste0("Gene_", 1:var)) %>%
      dplyr::mutate(label = y_train[,x])
    
    spec <- feature_spec(train_df, label ~ . ) %>% 
      tfdatasets::step_numeric_column(all_numeric(), normalizer_fn = tfdatasets::scaler_standard()) %>% 
      tfdatasets::fit()
    
    input <- tfdatasets::layer_input_from_dataset(train_df %>% dplyr::select(-label))
    
    output <- input %>% 
      keras::layer_dense_features(tfdatasets::dense_features(spec)) %>% 
      keras::layer_dense(units = 64, activation = "relu") %>%
      keras::layer_dense(units = 32, activation = "relu") %>%
      keras::layer_dense(units = 1) 
    model <- 
      keras::keras_model(input, output) %>% 
      keras::compile(
        loss = "mse",
        optimizer = optimizer_rmsprop(),
        metrics = list("mean_absolute_error")
        )
    
    
    history <- 
      model %>% 
      keras::fit(
        x = train_df %>% dplyr::select(-label),
        y = train_df$label,
        epochs = epochs,
        validation_split = 0.2,
        verbose = 0
    )
    
    test_predictions <- model %>% predict(test_df)
    
    out <- data.frame(PC=test_predictions)
    names(out) <- paste0("PC_",x)
    
    return(out)
    
  })
  
  df_plot <- as.data.frame(do.call(cbind, Predictions))
  
  return(df_plot)
  
  
  
}



#' @title  runSpatialRegression
#' @author Dieter Henrik Heiland
#' @description runSpatialRegression
#' @param object SPATA2 object
#' @param features Character of all factors to be correlated (features/genes/GeneSets)
#' @param model The model to be used: classical:Pearson Correlation; lmSLX, lagsarlm, errorsarlm, CCA or dist
#' @param smooth Logical, if TRUE spatial smooth values
#' @param normalize Logical, if TRUE normalize values
#' @inherit 
#' @return 
#' @examples 
#' @export
runSpatialRegression <- function (object, features, model, smooth = F, normalize = F) {
  color_var <- SPATA2::hlpr_join_with_aes(object, df = SPATA2::getCoordsDf(object), 
                                          color_by = features, normalize = normalize, smooth = smooth)
  coords <- color_var[, c("x", "y")] %>% as.data.frame()
  data <- color_var[, features] %>% as.data.frame()
  rownames(coords) = rownames(data) <- color_var$barcodes
  sp_obj <- sp::SpatialPointsDataFrame(coords = coords, data = data)
  nc <- spdep::knearneigh(sp_obj, k = 6, longlat = T)
  nc.nb <- spdep::knn2nb(nc)
  nb.list <- spdep::nb2listw(nc.nb)
  nb.mat <- spdep::nb2mat(nc.nb)
  runCCADist <- function(object, color_var, a, b) {
    sample <- SPATA2::getSampleNames(object)
    data <- color_var[, c(a, b)] %>% as.data.frame()
    names(data) <- c("a", "b")
    rownames(data) <- color_var$barcodes
    coord_spots <- object %>% SPATA2::getCoordsDf()
    coord_spots <- coord_spots[, c("row", "col")] %>% as.data.frame()
    rownames(coord_spots) <- object %>% SPATA2::getCoordsDf() %>% 
      pull(barcodes)
    coord_spots <- cbind(coord_spots, data[rownames(coord_spots), 
    ])
    coord_spots2 <- coord_spots
    coord_spots2$a <- coord_spots2$b * c(-1)
    getKernel <- function(coord_spots, var) {
      mat <- reshape2::acast(row ~ col, data = coord_spots, 
                             value.var = var)
      mat[is.na(mat)] <- 0
      mat %>% scales::rescale(., c(0, 1)) %>% oce::matrixSmooth()
    }
    real <- proxy::dist(x = getKernel(coord_spots, "a"), 
                        y = getKernel(coord_spots, "b"))
    X <- getKernel(coord_spots, "a")
    Y <- getKernel(coord_spots, "b")
    cca <- CCA::matcor(X, Y)
    return(c(cca$XYcor[!is.na(cca$XYcor)] %>% mean(), mean(na.omit(as.vector(real)))))
  }
  if (model == "classical") {
    mat.cor <- cor(data)
    return(mat.cor)
  }
  if (model == "lmSLX") {
    mat.cor <- matrix(NA, length(features), length(features))
    rownames(mat.cor) = colnames(mat.cor) <- features
    for (i in 1:length(features)) {
      for (j in 1:length(features)) {
        first_feat <- features[i]
        second_feat <- features[j]
        if (first_feat == second_feat) {
          mat.cor[first_feat, second_feat] = 0
        }
        else {
          formula <- as.formula(paste(first_feat, "~", 
                                      second_feat))
          reg1 <- lm(formula, data = sp_obj)
          reg2 = spatialreg::lmSLX(reg1, data = sp_obj, 
                                   nb.list)
          sum_mod_2 <- summary(reg2)
          cor_lag2 <- sum_mod_2$coefficients[3, 1]
          mat.cor[first_feat, second_feat] = as.numeric(cor_lag2)
        }
      }
    }
    return(mat.cor)
  }
  if (model == "lagsarlm") {
    if (length(features) >= 2) 
      stop("Run time to long, reduce number of features")
    first_feat <- features[1]
    second_feat <- paste(features[2:length(features)], collapse = "+")
    formula <- as.formula(paste(first_feat, "~", second_feat))
    reg1 <- lm(formula, data = sp_obj)
    sum_mod_1 <- summary(reg1)
    cor_lag1 <- sum_mod_1$coefficients[2, ]
    reg3 = spatialreg::lagsarlm(reg1, data = sp_obj, nb.list)
    sum_mod_3 <- summary(reg3)
    mat.cor <- data.frame(estimate = as.numeric(sum_mod_3$coefficients[2]), 
                          p.value = as.numeric(sum_mod_3$Wald1$p.value))
  }
  if (model == "errorsarlm") {
    if (length(features) >= 2) 
      stop("Run time to long, reduce number of features")
    first_feat <- features[1]
    second_feat <- paste(features[2:length(features)], collapse = "+")
    formula <- as.formula(paste(first_feat, "~", second_feat))
    reg1 <- lm(formula, data = sp_obj)
    sum_mod_1 <- summary(reg1)
    cor_lag1 <- sum_mod_1$coefficients[2, ]
    reg4 = spatialreg::errorsarlm(reg1, data = sp_obj, nb.list)
    sum_mod_4 <- summary(reg4)
    mat.cor <- data.frame(estimate = as.numeric(sum_mod_4$coefficients[2]), 
                          p.value = as.numeric(sum_mod_4$Wald1$p.value))
  }
  if (model == "CCA") {
    mat.cor <- matrix(NA, length(features), length(features))
    rownames(mat.cor) = colnames(mat.cor) <- features
    for (i in features) {
      for (j in features) {
        if (i == j) {
          mat.cor[i, j] <- 1
        }
        else {
          mat.cor[i, j] <- runCCADist(object, a = i, 
                                      b = j, color_var = color_var)[1]
        }
      }
    }
    return(mat.cor)
  }
  if (model == "dist") {
    mat.cor <- matrix(NA, length(features), length(features))
    rownames(mat.cor) = colnames(mat.cor) <- features
    for (i in features) {
      for (j in features) {
        if (i == j) {
          mat.cor[i, j] <- 0
        }
        else {
          mat.cor[i, j] <- runCCADist(object, a = i, 
                                      b = j, color_var = color_var)[2]
        }
      }
    }
    return(mat.cor)
  }
  return(mat.cor)
}



#' @title  runVectorFields
#' @author Dieter Henrik Heiland
#' @description The function computed teh vector direction of gene expression gradients along a given feature
#' @param object SPATA2 object
#' @param features Character value. Can be a numeric feature from the feature data, a gene or a pathway
#' @param dist.spot The distance for compute the vector (min is the distance of spots)
#' @param run.mcor Logical, if TRUE use feature multicore
#' @param workers Cores to use for multicore
#' @param ram Numeric, GB of ram storage used for multicore
#' @param smooth Logical. If TRUE, a loess fit is used to smooth the values.
#' @param smooth_span Numeric value. Controls the degree of smoothing. Given to argument span of stats::loess().
#' @param normalize Normalize Values 
#' @param cut_off Numeric, cut off low expression spots
#' @inherit 
#' @return 
#' @examples 
#' @export
runVectorFields <- function(object, 
                            features,
                            cut_off=NULL,
                            normalize=T,
                            smooth=T,
                            smooth_span=NULL,
                            dist.spot=10, 
                            run.mcor=T, 
                            workers=8,
                            ram=50){
  
  
  # Get the data:
  df <- SPATA2::hlpr_join_with_aes(object, 
                                   df = SPATA2::getCoordsDf(object), 
                                   color_by = features, 
                                   normalize = normalize, 
                                   smooth = smooth,
                                   smooth_span=smooth_span)
  
  if(!is.null(cut_off)){df[df[,features]<cut_off, features]=0}
  
  
  # Prepare data ------------------------------------------------------------
  NN.file <- SPATAwrappers::getSurroundedSpots(object)
  
  
  if(run.mcor==T){
    base::options(future.fork.enable = TRUE)
    future::plan("multisession", workers = workers)
    future::supportsMulticore()
    base::options(future.globals.maxSize = ram *100* 1024^2)
    message("... Run multicore ... ")
    
  }
  
  if(dist.spot<min(NN.file %>% filter(distance!=0) %>% pull(distance))){
    dist.spot <-min(NN.file %>% filter(distance!=0) %>% pull(distance))
    message(paste0("The distance was adopted for the minimal distance: ",dist.spot, "px"))}
  
  
  VF <- furrr::future_map(.x=1:nrow(df), .f=function(i){
    
    #Spot Def.
    bc <- df[i, c("barcodes")]
    cc <- df[i, c("x", "y")]
    #Neighbour Spots
    NN <- 
      NN.file %>% 
      dplyr::filter(xo < cc$x+dist.spot & xo > cc$x-dist.spot) %>% 
      dplyr::filter(yo < cc$y+dist.spot & yo > cc$y-dist.spot) %>%
      dplyr::pull(bc_destination)
    
    #Filter input DF
    NN.df <- df %>% dplyr::filter(barcodes %in% NN) %>% as.data.frame()
    parameter <- features
    
    #Create Vector
    V <- -c(as.numeric(cc) - c(NN.df$x[which.max(NN.df[,parameter])], NN.df$y[which.max(NN.df[,parameter])]))
    
    if(length(V)==0){out <- data.frame(barcodes=bc, t.x=0, t.y=0)}else{out <- data.frame(barcodes=bc, t.x=V[1], t.y=V[2])}
    
    return(out)
    
    
    
  }, .progress = T) %>% 
    do.call(rbind, .) %>% 
    as.data.frame()
  
  
  out <- cbind(df, VF)
  out[is.na(out)] <- 0
  
  return(out)
  
  
  
  
}
heilandd/SPATAwrappers documentation built on Oct. 2, 2022, 1:40 p.m.