# ##This file contains all the function used in the shiny application, grouped in sections.
# ##Many functions remain untouched from the original code found on this link:
# ##https://github.com/nolanlab/scaffold/tree/multiFilesClustering
# ##They are not commented.
# ##The new functions are on the top of their respective sections.
# ##OPTIONS
# options(stringsAsFactors = F)
# ##0. GLOBAL FUNCTIONS ----------------
# enrich.FCS.CIPHE <- function(original, new.column)
# {
# new_p <- parameters(original)[1,]
# ## Now, let's change it's name from $P1 to $P26 (or whatever the next new number is)
# new_p_number <- as.integer(dim(original)[2]+1)
# rownames(new_p) <- c(paste0("$P", new_p_number))
# ## Now, let's combine the original parameter with the new parameter
# library('BiocGenerics') ## for the combine function
# allPars <- combine(parameters(original), new_p)
# ## Fix the name and description of the newly added parameter, say we want to be calling it cluster_id
# new_p_name <- colnames(new.column)
# allPars@data$name[new_p_number] <- new_p_name
# allPars@data$desc[new_p_number] <- new_p_name
# new_exprs <- cbind(original@exprs, new.column)
# new_kw <- original@description
# new_kw["$PAR"] <- as.character(new_p_number)
# new_kw[paste0("$P",as.character(new_p_number),"N")] <- new_p_name
# new_kw[paste0("$P",as.character(new_p_number),"S")] <- new_p_name
# new_kw[paste0("$P",as.character(new_p_number),"E")] <- "0,0"
# new_kw[paste0("$P",as.character(new_p_number),"G")] <- "1"
# new_kw[paste0("$P",as.character(new_p_number),"B")] <- new_kw["$P1B"]
# new_kw[paste0("$P",as.character(new_p_number),"R")] <- new_kw["$P1R"]
# new_kw[paste0("flowCore_$P",as.character(new_p_number),"Rmin")] <- new_kw["flowCore_$P1Rmin"]
# new_kw[paste0("flowCore_$P",as.character(new_p_number),"Rmax")] <- new_kw["flowCore_$P1Rmax"]
# ## Now, let's just combine it into a new flowFrame
# new_fcs <- new("flowFrame", exprs=new_exprs, parameters=allPars, description=new_kw)
# return(new_fcs)
# }
# #Depending on the OS different functions are to choose a directory because no one is currently crossplatform.
# chooseDir <- function() {
# OS <- Sys.info()["sysname"]
# if (OS=="Windows") {
# Dir <-
# choose.dir(default = "", caption = "Select a Folder for saving:")
# }
# else if (OS=="Linux") {
# Dir <- tk_choose.dir(default = "", caption = "Select a Folder for saving:")
# }
# else {
# Dir <- choose.mac.dir()
# }
# return(Dir)
# }
# #Function used to select a folder via interface on a mac OS system.
# choose.mac.dir <- function() {
# system("osascript -e 'tell app \"R\" to POSIX path of (choose folder with prompt \"Select a Folder for saving:\")' > /tmp/R_folder",
# intern = FALSE, ignore.stderr = TRUE)
# p <- system("cat /tmp/R_folder && rm -f /tmp/R_folder", intern = TRUE)
# return(ifelse(length(p), p, NA))
# }
# get_cluster_groups_table <- function(v, key) {
# tags$table(class = "table table-hover table-striped",
# tags$tr(tags$td(
# v[1],
# tags$button(class = "btn btn-xs btn-warning pull-right", onClick = sprintf("Shiny.onInputChange('clusteringui_remove_clustering_group', {'key':'%s', 'x':Math.random()})", key),
# tags$span(class = "glyphicon glyphicon-trash")
# )
# )),
# ifelse(length(v > 1),
# tagList(lapply(tail(v, n = -1), function(x) {tags$tr(tags$td(x))})),
# tagList()
# )
# )
# }
# returnOrder <- function(inputId, vars) {
# tagList(
# shiny::singleton(tags$head(tags$script(src = 'sort.js'))),
# shiny::singleton(tags$head(tags$link(rel = 'stylesheet', type = 'text/css', href = 'sort.css'))),
# shiny::HTML(html_list(vars, inputId)),
# tags$script(paste0("$(function() {$( '#",inputId,"' ).sortable({placeholder: 'ui-state-highlight'}); $( '#",inputId,"' ).disableSelection(); });"))
# )
# }
# updateReturnOrder <- function(session, inputId, vars)
# {
# session$sendInputMessage(inputId, list(value = vars))
# }
# html_list <- function(vars, id) {
# hl <- paste0("<ul id=\'",id,"\' class='stab'>")
# for(i in vars) hl <- paste0(hl, "<li class='ui-state-default stab'><span class='label'>",i,"</span></li>")
# paste0(hl, "</ul>")
# }
# my_load <- function(f_name)
# {
# con <- file(f_name, "rb")
# retval <- unserialize(con)
# close(con)
# return(retval)
# }
# my_save <- function(obj, f_name)
# {
# con <- file(f_name, "wb")
# serialize(obj, con, ascii = F)
# close(con)
# }
# logiclTransformCIPHE <- function(flow.frame, value = NULL, markers = NULL)
# {
# print(markers)
# if(is.null(markers) || length(markers)<1){
# if(is.null(flow.frame@description[["SPILL"]])){
# markers.transform <- colnames(flow.frame)
# } else {
# markers.transform <- colnames(flow.frame@description[["SPILL"]])
# }
# } else {
# markers.transform <- markers
# }
# list.index <- names(unlist(lapply(markers.transform, function(x) return(which(flow.frame@description==x)))))
# list.index <- gsub("N","", list.index)
# list.index <- gsub("\\$P","", list.index)
# if(is.null(value)){
# if(!is.null(flow.frame@description[[paste0("$P",list.index[1],"MS")]])){
# r.values <- unlist(lapply(list.index, function(x)
# as.integer(flow.frame@description[[paste0("$P",x,"MS")]]))
# )
# } else if(!is.null(flow.frame@description[[paste0("P",list.index[1],"MS")]])) {
# r.values <- unlist(lapply(list.index, function(x)
# as.integer(flow.frame@description[[paste0("P",x,"MS")]]))
# )
# } else {
# r.values <- rep(90, length(list.index))
# }
# }
# else {
# r.values <- rep(value, length(list.index))
# }
# w.values <- (4.5-log10(262143/abs(r.values)))/2
# w.values[which(w.values<0)] <- 0.5
# w.values[which(is.infinite(w.values))] <- 0.5
# for(t in 1:length(markers.transform)){
# lgcl <- logicleTransform(w=w.values[t])
# flow.frame <- transform(flow.frame, transformList(markers.transform[t],lgcl))
# }
# return(flow.frame)
# }
# inversLogiclTransformCIPHE <- function(flow.frame, value = NULL, markers = NULL)
# {
# if(is.null(markers)){
# if(is.null(flow.frame@description[["SPILL"]])){
# markers.transform <- colnames(flow.frame)
# } else {
# markers.transform <- colnames(flow.frame@description[["SPILL"]])
# }
# } else {
# markers.transform <- markers
# }
# list.index <- names(unlist(lapply(markers.transform, function(x) return(which(flow.frame@description==x)))))
# list.index <- gsub("N","", list.index)
# list.index <- gsub("\\$P","", list.index)
# if(is.null(value)){
# if(!is.null(flow.frame@description[[paste0("$P",list.index[1],"MS")]])) {
# r.values <- unlist(lapply(list.index, function(x)
# as.integer(flow.frame@description[[paste0("$P",x,"MS")]]))
# )
# } else if(!is.null(flow.frame@description[[paste0("P",list.index[1],"MS")]])) {
# r.values <- unlist(lapply(list.index, function(x)
# as.integer(flow.frame@description[[paste0("P",x,"MS")]]))
# )
# } else {
# r.values <- rep(90, length(list.index))
# }
# }
# else {
# r.values <- rep(value, length(list.index))
# }
# w.values <- (4.5-log10(262144/abs(r.values)))/2
# w.values[which(w.values<0)] <- 0.5
# w.values[which(is.infinite(w.values))] <- 0.5
# flow.frame.inv <- flow.frame
# for(t in 1:length(markers.transform)){
# invLgcl <- inverseLogicleTransform(trans = logicleTransform(w=w.values[t]))
# flow.frame.inv <- transform(flow.frame.inv, transformList(markers.transform[t],invLgcl))
# }
# return(flow.frame.inv)
# }
# arcsinhTransCIPHE <- function(flow.frame, marker=NULL, arg){
# raw <- flow.frame@exprs
# mat <- flow.frame@exprs
# if(is.null(marker) || length(marker)<1){
# marker <- colnames(flow.frame)
# }
# # print(marker)
# mat <- mat[,marker]
# colnames(mat) <- marker
# mat <- asinh(mat/arg)
# raw[,marker] <- mat[,marker]
# flow.frame@exprs <- raw
# return(flow.frame)
# }
# inversArcsinhTransCIPHE <- function(flow.frame, marker_untrans=NULL, arg){
# raw <- flow.frame@exprs
# mat <- flow.frame@exprs
# if(!is.null(marker_untrans)|| length(marker_untrans)>0){
# marker_untrans_index <- which(colnames(flow.frame)%in%marker_untrans)
# # print(marker_untrans_index)
# mat <- mat[,-marker_untrans_index]
# }
# marker <- colnames(mat)
# mat <- sinh(mat)*arg
# raw[,marker] <- mat[,marker]
# flow.frame@exprs <- raw
# return(flow.frame)
# }
# deCompensateFlowFrame <- function(x, spillover)
# {
# if(!is.null(spillover)){
# cols <- colnames(spillover)
# sel <- cols %in% colnames(x)
# if(!all(sel)) {
# stop(keyword(x)[["FILENAME"]], "\\nThe following parameters in the spillover matrix are not present in the flowFrame:\\n",
# paste(cols[!sel], collapse=", "), call.=FALSE)
# }
# e <- exprs(x)
# e[, cols] <- e[, cols] %*% spillover
# exprs(x) = e
# return(x)
# } else {
# return(x)
# }
# }
# ##1. CLUSTERING ----------------
# #marque page: à enlever?
# #Function used to organize data. Groups are processed depending whether they contain multiple flow frames or not.
# get_flow_frames_from_groups <- function(flow.frames, files.list)
# {
# v <- list()
# v <- lapply(files.list, function(x) {
# if (length(x) > 1) {
# w <- list()
# w <- lapply(x, function(y) {
# j <- flow.frames[names(flow.frames) == y]
# names(j) <- y
# return(j)
# })
# names(w) <- x
# return (w)
# }
# else {
# u <- flow.frames[names(flow.frames) == x]
# names(u) <- x
# return(u)
# }
# })
# return(v)
# }
# #Function that compensates data and/or transforms them.
# pre_process_fcs <- function(flow.frames, arg, transformation, compens, marker_untrans)
# {
# flow.frames <- lapply(flow.frames, function(flow.frame){
# if (compens == TRUE) {
# comp <- grep("SPILL", names(description(flow.frame)), value = T)
# if(length(comp)>0){
# print("Found compensation matrix, applying...")
# comp <- description(flow.frame)[comp][[1]]
# if(is.character(comp)){
# comp <- strsplit(comp, ",")[[1]]
# num.channels <- as.numeric(comp[1])
# m <- matrix(nrow = num.channels, byrow = T, data = as.numeric(comp[(num.channels + 2):length(comp)]))
# colnames(m) <- comp[2:(1 + num.channels)]
# comp <- m
# }
# flow.frame <- compensate(flow.frame, spillover = comp)
# }
# }
# #Any transformation method can be added here, but you must also modify server.R and ui.R accordingly.
# if(transformation == "Asinh"){
# marker_trans <- colnames(flow.frame)[-which(colnames(flow.frame)%in%marker_untrans)]
# flow.frame <- arcsinhTransCIPHE(flow.frame, marker_trans, arg)
# } else if(transformation == "Logicle"){
# marker_trans <- colnames(flow.frame)[-which(colnames(flow.frame)%in%marker_untrans)]
# flow.frame <- logiclTransformCIPHE(flow.frame, arg, marker_trans) # transform value
# }
# return(flow.frame)
# })
# return(flow.frames)
# }
# #Function used to transform data via Logicle transform, determining automaticly the better factor.
# run_clustering <- function(flow.frames, methods, args, nb.cluster, params,
# outputDir, index, groups.clustering=NULL, ncores=1, transComp, marker_untrans){
# if(!is.null(groups.clustering) || length(groups.clustering)<length(flow.frames)){
# flow.frames <- lapply(groups.clustering, function(x){
# ff <- flow.frames[[x[1]]]
# file.params <- rep(1, dim(ff)[1])
# if(length(x)>1){
# lapply(c(2:length(x)), function(y){
# ff@exprs <<- rbind2(ff@exprs, cbind2(flow.frames[[x[y]]]@exprs), names(groups.clustering)[[y]])
# file.params <<- as.matrix(c(file.params, rep(y, dim(flow.frames[[x[y]]])[1])))
# })
# }
# file.params <- as.matrix(file.params)
# colnames(file.params) <- "sample"
# if ("sample" %in% colnames(ff@exprs)) {
# ff@exprs[, "sample"] <- as.matrix(file.params)
# }
# else {
# # ff <- cbind2(ff, as.matrix(file.params))
# ff <- enrich.FCS.CIPHE(ff, file.params)
# }
# return(ff)
# })
# }
# marker.enrich <- paste0(methods, ".", dim(flow.frames[[1]]@exprs)[2]+1)
# cluster_flow_frame <- function (flow.frame, methods, outputDir, params, nb.cluster, name, groups.clustering = T, method.matrix = "median") {
# # source("ModifyFCS.R")
# enrich.FCS.CIPHE <- function(original, new.column)
# {
# new_p <- parameters(original)[1,]
# ## Now, let's change it's name from $P1 to $P26 (or whatever the next new number is)
# new_p_number <- as.integer(dim(original)[2]+1)
# rownames(new_p) <- c(paste0("$P", new_p_number))
# ## Now, let's combine the original parameter with the new parameter
# library('BiocGenerics') ## for the combine function
# allPars <- combine(parameters(original), new_p)
# ## Fix the name and description of the newly added parameter, say we want to be calling it cluster_id
# new_p_name <- colnames(new.column)
# allPars@data$name[new_p_number] <- new_p_name
# allPars@data$desc[new_p_number] <- new_p_name
# new_exprs <- cbind(original@exprs, new.column)
# new_kw <- original@description
# new_kw["$PAR"] <- as.character(new_p_number)
# new_kw[paste0("$P",as.character(new_p_number),"N")] <- new_p_name
# new_kw[paste0("$P",as.character(new_p_number),"S")] <- new_p_name
# new_kw[paste0("$P",as.character(new_p_number),"E")] <- "0,0"
# new_kw[paste0("$P",as.character(new_p_number),"G")] <- "1"
# new_kw[paste0("$P",as.character(new_p_number),"B")] <- new_kw["$P1B"]
# new_kw[paste0("$P",as.character(new_p_number),"R")] <- new_kw["$P1R"]
# new_kw[paste0("flowCore_$P",as.character(new_p_number),"Rmin")] <- new_kw["flowCore_$P1Rmin"]
# new_kw[paste0("flowCore_$P",as.character(new_p_number),"Rmax")] <- new_kw["flowCore_$P1Rmax"]
# ## Now, let's just combine it into a new flowFrame
# new_fcs <- new("flowFrame", exprs=new_exprs, parameters=allPars, description=new_kw)
# return(new_fcs)
# }
# library("flowCore")
# library("plyr")
# m <- flow.frame@exprs[,params]
# labels <- pData(parameters(flow.frame))[,2]
# params.names <- pData(parameters(flow.frame))[,1]
# labels[which(is.na(labels))] <- colnames(flow.frame)[c(which(is.na(labels)))]
# names(params.names) <- labels
# # clustering
# groups <- cluster::clara(m,k=nb.cluster,samples=args)$clustering
# new_col <- as.matrix(groups)
# marker <- paste0(methods, ".", dim(flow.frame@exprs)[2]+1)
# colnames(new_col) <- marker
# # over.clustering
# flow.frame.e <- flow.frame
# flow.frame.e <- enrich.FCS.CIPHE(flow.frame, new_col)
# # flow.frame.e <- cbind2(flow.frame, new_col)
# tab <- as.data.frame(flow.frame.e@exprs)
# names(tab) <- c(names(params.names), marker)
# if(method.matrix == "mean"){
# tab.me <- ddply(.data = tab, .variables = marker, .fun = colwise(mean, is.numeric))
# } else if(method.matrix == "median"){
# tab.me <- ddply(.data = tab, .variables = marker, .fun = colwise(median, is.numeric))
# }
# pop.size <- ddply(.data = tab, .variables = marker, .fun = nrow)
# names(pop.size) <- gsub("V1", "popsize", names(pop.size))
# res <- merge(tab.me, pop.size, by = marker)
# if (!is.null(groups.clustering)) {
# if(method.matrix == "mean"){
# tab.me.by.sample <- ddply(.data = tab, .variables = c(marker, "sample"), .fun = colwise(mean, is.numeric))
# } else if(method.matrix == "median"){
# tab.me.by.sample <- ddply(.data = tab, .variables = c(marker, "sample"), .fun = colwise(median, is.numeric))
# }
# pop.size.by.sample <- ddply(.data = tab, .variables = as.formula(paste0("~", marker, " * sample")), .fun = nrow)
# names(pop.size.by.sample) <- gsub("V1", "popsize", names(pop.size.by.sample))
# tab.me.by.sample <- merge(tab.me.by.sample, pop.size.by.sample, by = c(marker, "sample"), all.x = T)
# #Rotate the by.sample table
# temp <- reshape::melt(tab.me.by.sample, id = c(marker, "sample"))
# temp$variable <- paste(temp$variable, temp$sample, sep = "@")
# temp$sample <- NULL
# formula <- paste(marker, "variable", sep = " ~ ")
# temp <- reshape::cast(temp, as.formula(formula))
# res <- merge(res, temp, by = marker, all.x = T)
# }
# return(list(flow.frame.e,res))
# }
# cl <- makeCluster(ncores)
# registerDoParallel(cl)
# flow.frames.e <- foreach (i = c(1:length(flow.frames))) %dopar% cluster_flow_frame(flow.frames[[i]], methods, outputDir, params, nb.cluster, names(flow.frames)[[i]], groups.clustering = groups.clustering)
# # flow.frames.e <- lapply(c(1:length(flow.frames)), function(i){
# # cluster_flow_frame(flow.frames[[i]], methods, outputDir, params, nb.cluster, names(flow.frames)[[i]], groups.clustering = groups.clustering)
# # })
# stopCluster(cl)
# f1 <- lapply(flow.frames.e, function(a){return(a[[1]])})
# f2 <- lapply(flow.frames.e, function(b){return(b[[2]])})
# names(f1) <- names(flow.frames)
# names(f2) <- names(flow.frames)
# lapply(c(1:length(f2)), function(x) {
# lapply(c(1:length(groups.clustering[[x]])), function(y) {
# colnames(f2[[x]]) <<- gsub(paste0("@", y), paste0("@", groups.clustering[[x]][[y]]), colnames(f2[[x]]))
# })
# })
# lapply(c(1:length(f1)), function(x){
# outname <- paste0(outputDir,"/enriched_", names(f1)[x])
# ff <- f1[[x]]
# if (!is.na(transComp[4])) {
# if (transComp[2] == "Logicle") {
# print("Logicle detransformation...")
# ff <- inversLogiclTransformCIPHE(ff, markers=marker_untrans)
# } else if (transComp[2] == "Asinh") {
# ff <- inversArcsinhTransCIPHE(ff, c(marker.enrich,marker_untrans,"sample"), as.integer(transComp[3]))
# }
# if(transComp[1] == T)
# print("Decompensation.")
# ff <- deCompensateFlowFrame(ff, ff@description[["SPILL"]])
# }
# print("Writing FCS...")
# # write.FCS(ff, outname, delimiter="#")
# write.FCS(ff, outname)
# })
# # print(f2[[1]][,"popsize"])
# return(list(f1,f2))
# }
# get_matrix_from_fcs <- function(i, flow.frames, method.matrix="median", marker, groups.clustering=NULL
# #need.transform = FALSE, arg = NULL, transformation = NULL, compens = NULL)
# ) {
# flow.frame <- flow.frames[[i]]
# # if(need.transform == TRUE){
# # flow.frame <- pre_process_fcs(flow.frames, arg, transformation, compens))
# # }
# tab <- as.data.frame(flow.frame@exprs)
# labels <- pData(parameters(flow.frame))[,2]
# params.names <- pData(parameters(flow.frame))[,1]
# labels[which(is.na(labels))] <- colnames(flow.frame)[c(which(is.na(labels)))]
# labels[which(labels=="<NA>")] <- colnames(flow.frame)[c(which(labels=="<NA>"))]
# names(params.names) <- labels
# names(tab) <- names(params.names)
# if(method.matrix == "mean"){
# tab.me <- ddply(.data = tab, .variables = marker, .fun = colwise(mean, is.numeric))
# } else if(method.matrix == "median"){
# tab.me <- ddply(.data = tab, .variables = marker, .fun = colwise(median, is.numeric))
# }
# pop.size <- ddply(.data = tab, .variables = marker, .fun = nrow)
# names(pop.size) <- gsub("V1", "popsize", names(pop.size))
# res <- merge(tab.me, pop.size, by = marker)
# if (!is.null(groups.clustering)) {
# if(method.matrix == "mean"){
# tab.me.by.sample <- ddply(.data = tab, .variables = c(marker, "sample"), .fun = colwise(mean, is.numeric))
# } else if(method.matrix == "median"){
# tab.me.by.sample <- ddply(.data = tab, .variables = c(marker, "sample"), .fun = colwise(median, is.numeric))
# }
# pop.size.by.sample <- ddply(.data = tab, .variables = as.formula(paste0("~", marker, " * sample")), .fun = nrow)
# names(pop.size.by.sample) <- gsub("V1", "popsize", names(pop.size.by.sample))
# tab.me.by.sample <- merge(tab.me.by.sample, pop.size.by.sample, by = c(marker, "sample"), all.x = T)
# #Rotate the by.sample table
# temp <- reshape::melt(tab.me.by.sample, id = c(marker, "sample"))
# temp$variable <- paste(temp$variable, temp$sample, sep = "@")
# temp$sample <- NULL
# formula <- paste(marker, "variable", sep = " ~ ")
# temp <- reshape::cast(temp, as.formula(formula))
# res <- merge(res, temp, by = marker, all.x = T)
# }
# lapply(c(1:length(groups.clustering[[i]])), function(y) {
# colnames(res) <<- gsub(paste0("@", y), paste0("@", groups.clustering[[i]][[y]]), colnames(res))
# })
# return(as.matrix(res))
# }
# ##2. ANALYSIS ----------------
# #Runs the gated analysis, mainly like the original code but with a few tricks due to how data are processed in this version.
# run_analysis_gated <- function(flow.frames, clusteredFiles, map.clustedFiles.names, outputDir,
# groups.clustering, col.names.gated, col.names.matrix, inter.cluster.connections, col.names.inter_cluster,
# inter_cluster.weight_factor, overlap_method, ew_influence)
# {
# print(inter_cluster.weight_factor)
# gated.flow.frames <- lapply(c(1:length(flow.frames)), function(i) {flow.frames[[i]]@exprs})
# print(sprintf("Markers used for SCAFFoLD: %s", paste(col.names.gated, collapse = ", ")))
# print(paste("Using as reference ", map.clustedFiles.names, sep = " "))
# gated_data <- load_attractors_from_gated_data(flow.frames, col.names.gated)
# tab.attractors <- gated_data$tab.attractors
# att.labels <- gated_data$cellType_key$population
# G.attractors <- NULL
# ret <- process_files(clusteredFiles,map.clustedFiles.names, G.attractors, tab.attractors, att.labels,
# col.names.gated = col.names.gated, col.names.matrix = col.names.matrix, scaffold.mode = "gated",
# inter.cluster.connections = inter.cluster.connections, col.names.inter_cluster = col.names.inter_cluster
# )
# ret <- c(list(scaffold.col.names = col.names.gated, landmarks.data = gated_data$downsampled.data), ret)
# print(paste("Map file created:", sprintf("%s.scaffold", map.clustedFiles.names, sep = "/")))
# my_save(ret, paste(outputDir, sprintf("%s.scaffold", map.clustedFiles.names), sep = "/"))
# return(ret)
# }
# load_attractors_from_gated_data <- function(flow.frames, col.names, ...)
# {
# res <- NULL
# for(i in c(1:length(flow.frames))) {
# population <- tail(strsplit(names(flow.frames)[i], "_")[[1]], n = 1)
# fcs <- flow.frames[[i]]
# tab <- exprs(fcs)
# m <- as.matrix(tab)
# col.names <- colnames(m)
# tab <- data.frame(m)
# names(tab) <- col.names
# if(!all(pData(parameters(fcs))$desc == " ")) colnames(tab) <- pData(parameters(fcs))$desc
# else colnames(tab) <- pData(parameters(fcs))$name
# if(any(is.na(colnames(tab)))) {
# w <- is.na(colnames(tab))
# colnames(tab)[w] <- pData(parameters(fcs))$name[w]
# }
# tab <- as.matrix(tab)
# tab[tab < 0] <- 0
# tab <- as.data.frame(tab)
# tab <- cbind(tab, population, stringsAsFactors = F)
# res <- rbind(res, tab)
# }
# downsampled.data <- downsample_by(res, "population", 1000)
# names(downsampled.data) <- gsub("population", "cellType", names(downsampled.data))
# #Change cellType to be numbers
# k <- unique(res$population)
# k <- data.frame(population = k, cellType = seq_along(k), stringsAsFactors = F)
# res <- merge(res, k)
# res <- res[, grep("population", names(res), invert = T)]
# res <- ddply(res, ~cellType, colwise(median))
# return(list(downsampled.data = downsampled.data, tab.attractors = res, cellType_key = k))
# }
# downsample_by <- function(tab, col.name, size)
# {
# print(sprintf("Downsampling to %d events", size))
# return(ddply(tab, col.name, function(x, size) {
# if(nrow(x) <= size){
# return(x)
# } else {
# return(x[sample(1:nrow(x), size),])
# }
# }, size = size))
# }
# #Function used to process files into objects containing graphs and data.
# process_files <- function(clusteredFiles, map.clustedFiles.names=NULL, G.attractors, tab.attractors, att.labels, col.names.gated,
# col.names.matrix, scaffold.mode, ref.scaffold.markers = NULL,
# names.mapping = NULL, ew_influence = NULL,col.names.inter_cluster = NULL, ...)
# {
# ret <- list(graphs = list(), clustered.data = list())
# map_names <- names_map_factory(names.mapping)
# if(is.null(map.clustedFiles.names)){
# map.clustedFiles.names <- names(clusteredFiles)[1]
# }
# print(paste("Processing ",map.clustedFiles.names, sep = " "))
# tab <- clusteredFiles[[map.clustedFiles.names]]
# names(tab) <- map_names(names(tab))
# col.names.inter_cluster <- map_names(col.names.inter_cluster)
# if(is.null(ew_influence)) ew_influence <- ceiling(length(col.names.gated) / 3)
# tab <- tab[!apply(tab[, col.names.matrix], 1, function(x) {all(x == 0)}),]
# names(tab) <- gsub("cellType", "groups", names(tab))
# names(tab) <- gsub("^X", "", names(tab))
# print(sprintf("Running with Edge weight: %f", ew_influence))
# print(col.names.gated)
# print(col.names.matrix)
# res <- process_data(tab, map.clustedFiles.names, G.attractors, tab.attractors,
# col.names.gated = col.names.gated, col.names.matrix = col.names.matrix,
# att.labels = att.labels, already.clustered = T, ew_influence = ew_influence,
# col.names.inter_cluster = col.names.inter_cluster, ...)
# G.complete <- get_highest_scoring_edges(res$G.complete)
# ret$graphs[map.clustedFiles.names] <- list(G.complete)
# G.attractors <- res$G.attractors
# for(i in c(1:length(clusteredFiles))) {
# if(names(clusteredFiles)[i]==map.clustedFiles.names){
# NULL
# } else {
# print(paste("Processing", names(clusteredFiles)[[i]], sep = " "))
# tab <- clusteredFiles[[i]]
# names(tab) <- map_names(names(tab))
# col.names.inter_cluster <- map_names(col.names.inter_cluster)
# if(scaffold.mode == "existing") {
# #Some markers in the reference scaffold file have been designated
# #for mapping, but they are missing from the sample files
# if(any(is.na(names(names.mapping))))
# tab <- add_missing_columns(tab, col.names, fill.data = 0)
# if(is.null(ew_influence))
# ew_influence <- ceiling(sum(!is.na(names(names.mapping))) / 3)
# } else {
# if(is.null(ew_influence)) ew_influence <- ceiling(length(col.names.gated) / 3)
# }
# tab <- tab[!apply(tab[, col.names.matrix], 1, function(x) {all(x == 0)}),]
# names(tab) <- gsub("cellType", "groups", names(tab))
# names(tab) <- gsub("^X", "", names(tab))
# print(sprintf("Running with Edge weight: %f", ew_influence))
# res <- process_data(tab, map.clustedFiles.names, G.attractors, tab.attractors,
# col.names.gated = col.names.gated, col.names.matrix = col.names.matrix,
# att.labels = att.labels, already.clustered = T, ew_influence = ew_influence,
# col.names.inter_cluster = col.names.inter_cluster, ...)
# G.complete <- get_highest_scoring_edges(res$G.complete)
# ret$graphs[names(clusteredFiles)[[i]]] <- list(G.complete)
# G.attractors <- res$G.attractors
# }
# }
# suppressWarnings( ret <- c(ret, list(dataset.statistics = get_dataset_statistics(ret))) )
# # ret <- c(ret, list(dataset.statistics = get_dataset_statistics(ret)))
# return(ret)
# }
# get_highest_scoring_edges <- function(G)
# {
# #Remove inter-cluster edges for this calculation
# e <- get.edges(G, E(G))
# E(G)$edge_type <- "cluster_to_landmark"
# e <- cbind(V(G)$type[e[,1]], V(G)$type[e[,2]])
# to.remove <- (e[,1] == 2) & (e[,2] == 2)
# E(G)$edge_type[(e[,1] == 2) & (e[,2] == 2)] <- "inter_cluster"
# g.temp <- delete.edges(G, E(G)[to.remove])
# V(g.temp)$highest_scoring_edge <- 0
# for(i in 1:vcount(g.temp))
# {
# if(V(g.temp)$type[i] == 2)
# {
# sel.edges <- incident(g.temp, i)
# max.edge <- sel.edges[which.max(E(G)[sel.edges]$weight)]
# V(g.temp)$highest_scoring_edge[i] <- max.edge
# E(G)$edge_type[max.edge] <- "highest_scoring"
# }
# }
# V(G)$highest_scoring_edge <- V(g.temp)$highest_scoring_edge
# return(G)
# }
# names_map_factory <- function(names.map)
# {
# function(v)
# {
# sel <- v %in% names(names.map)
# if(any(sel))
# v[sel] <- names.map[v[sel]]
# return(v)
# }
# }
# process_data <- function(tab, map.clustedFiles.names=NULL, G.attractors = NULL, tab.attractors = NULL, col.names.gated = NULL, col.names.matrix = NULL, att.labels = NULL, dist.thresh = 0.7,
# already.clustered = FALSE, inter.cluster.connections = FALSE, col.names.inter_cluster = NULL, inter_cluster.weight_factor = 0.7, ew_influence,
# overlap_method = NULL){
# if(!already.clustered) {
# tab <- cluster_data(tab, col.names.matrix)
# tab.clustered <- ddply(tab, ~groups, colwise(median))
# }
# else
# tab.clustered <- tab
# if(is.null(col.names.inter_cluster) || col.names.inter_cluster == "")
# col.names.inter_cluster = col.names.matrix
# if(is.null(G.attractors)) {
# G.attractors <- build_graph(tab.attractors, col.names.gated) #marque page
# G.complete <- add_vertices_to_attractors_graph(G.attractors, tab.clustered, tab.attractors, col.names.att = col.names.gated, col.names.matrix = col.names.matrix, dist.thresh)
# G.complete <- complete.forceatlas2(G.complete, first.iter = 50000,
# overlap.iter = 20000, ew_influence = ew_influence, overlap_method = "repel")
# if(inter.cluster.connections)
# {
# print("Adding inter-cluster connections with markers:")
# print(col.names.inter_cluster)
# print(sprintf("Weight factor:%f", inter_cluster.weight_factor))
# G.complete <- add_inter_clusters_connections(G.complete, col.names.inter_cluster, weight.factor = inter_cluster.weight_factor)
# G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
# ew_influence = ew_influence, overlap_method = overlap_method)
# }
# V(G.attractors)$x <- V(G.complete)$x[1:vcount(G.attractors)]
# V(G.attractors)$y <- V(G.complete)$y[1:vcount(G.attractors)]
# } else {
# print(col.names.gated)
# print(col.names.matrix)
# G.complete <- add_vertices_to_attractors_graph(G.attractors, tab.clustered, tab.attractors, col.names.att = col.names.gated, col.names.matrix = col.names.matrix, dist.thresh)
# fixed <- rep(FALSE, vcount(G.complete))
# fixed[1:vcount(G.attractors)] <- TRUE
# G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
# overlap_method = "repel", ew_influence = ew_influence, fixed = fixed)
# if(inter.cluster.connections) {
# print("Adding inter-cluster connections with markers:")
# print(col.names.inter_cluster)
# print(sprintf("Weight factor:%f", inter_cluster.weight_factor))
# G.complete <- add_inter_clusters_connections(G.complete, col.names.inter_cluster, weight.factor = inter_cluster.weight_factor)
# G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
# overlap_method = overlap_method, ew_influence = ew_influence, fixed = fixed)
# }
# }
# G.complete <- add_attractors_labels(G.complete, att.labels)
# V(G.complete)$name <- gsub(".fcs", "", V(G.complete)$name)
# return(list(G.attractors = G.attractors, G.complete = G.complete, tab.attractors = tab.attractors, tab = tab, col.names.gated = col.names.gated, col.names.matrix = col.names.matrix))
# }
# add_vertices_to_attractors_graph <- function(G, tab.clustered, tab.median, col.names.att, col.names.matrix, dist.thresh = 0.7)
# {
# dd <- get_distances_from_attractors(tab.clustered, tab.median, col.names.att, col.names.matrix, dist.thresh)
# n <- nrow(dd)
# num.vertices <- length(V(G))
# G <- add.vertices(G, n)
# v.seq <- (num.vertices + 1):length(V(G))
# V(G)[v.seq]$name <- as.character(v.seq)
# V(G)[v.seq]$Label <- paste("c", tab.clustered[,1], sep = "")
# row.names(dd) <- as.character(v.seq)
# for(i in 1:nrow(dd))
# {
# v <- dd[i,]
# v <- v[v > 0]
# if(length(v) > 0)
# {
# e.list <- c(rbind(as.character(num.vertices + i), names(v)))
# G <- G + edges(e.list, weight = v)
# }
# }
# # weight <- E(G)$weight
# # E(G)$weight <- weight ^ 10
# maxx <- maxy <- rep(Inf, vcount(G))
# minx <- miny <- rep(-Inf, vcount(G))
# maxx[1:num.vertices] <- minx[1:num.vertices] <- V(G)$x[1:num.vertices]
# maxy[1:num.vertices] <- miny[1:num.vertices] <- V(G)$y[1:num.vertices]
# lay <- layout.kamada.kawai(G, minx = minx, maxx = maxx, miny = miny, maxy = maxy)
# colnames(lay) <- c("x", "y")
# G <- set.vertex.attribute(G, name = "x", value = lay[, "x"])
# G <- set.vertex.attribute(G, name = "y", value = lay[, "y"])
# V(G)[1:num.vertices]$type <- 1 #attractor
# V(G)[(num.vertices + 1):vcount(G)]$type <- 2 #cell
# for(i in colnames(tab.clustered)){
# G <- set.vertex.attribute(G, name = i, index = (num.vertices + 1):vcount(G), value = tab.clustered[, i])
# }
# G <- set_visual_attributes(G)
# return(G)
# }
# set_visual_attributes <- function(G)
# {
# att <- V(G)$type == 1
# V(G)$r <- 79
# V(G)$g <- 147
# V(G)$b <- 222
# V(G)$size <- 10
# V(G)[att]$r <- 255
# V(G)[att]$g <- 117
# V(G)[att]$b <- 128
# V(G)[att]$size <- 20
# E(G)$r <- 180
# E(G)$g <- 180
# E(G)$b <- 180
# return(G)
# }
# build_graph <- function(tab, col.names, filtering_T = 0.7, method = "euclidean")
# {
# m <- as.matrix(tab[, col.names])
# row.names(m) <- tab$cellTyp
# #Any distance metric can be added here
# if (method == "cosine") {
# dd <- cosine_similarity_matrix(m)
# diag(dd) <- 0
# dd[is.na(dd)] <- 0 #This can happen if one of the attractors has all 0's for the markers of interest
# if(filtering_T >= 1) {
# dd <- filter_similarity_matrix_by_rank(dd, filtering_T)
# }
# else {
# dd <- filter_similarity_matrix(dd, filtering_T)
# }
# }
# else if (method == "euclidean") {
# dd <- euclid_similarity_matrix(m)
# diag(dd) <- 0
# dd[is.na(dd)] <- 0
# }
# G <- graph.adjacency(dd, mode = "undirected", weighted = T)
# n.vertices <- length(V(G))
# lay <- layout.kamada.kawai(G)
# colnames(lay) <- c("x", "y")
# G <- set.vertex.attribute(G, name = "x", value = lay[, "x"])
# G <- set.vertex.attribute(G, name = "y", value = lay[, "y"])
# for(i in names(tab))
# G <- set.vertex.attribute(G, name = i, value = tab[, i])
# return(G)
# }
# cosine_similarity_matrix <- function(m)
# {
# ret <- t(apply(m, 1, function(x, m) {cosine_similarity_from_matrix(x, m)}, m = m))
# return(ret)
# }
# cosine_similarity_from_matrix <- function(v, m)
# {
# m <- as.matrix(m[, c(1:length(v)), drop = F])
# ret <- apply(m, 1, function(x, v) {
# return(crossprod(x, v)/sqrt(crossprod(x) * crossprod(v)))}, v)
# return(ret)
# }
# # Implementation of euclid similarity #marque page
# euclid_similarity_matrix <- function(m)
# {
# euclid <- as.matrix(dist(m, method="euclidean"))
# # ret <- 1 - (euclid/max(euclid)) #simple similarity
# ret <- exp(-(euclid/(0.4*4.5))^2) #more complex similarity
# return(ret)
# }
# filter_similarity_matrix <- function(m, T)
# {
# ret <- t(apply(m, 1, function(x)
# {
# if(max(x) <= T)
# x[x < max(x)] <- 0
# else
# x[x < T] <- 0
# return(x)
# }))
# return(ret)
# }
# filter_similarity_matrix_by_rank <- function(m, T)
# {
# ret <- t(apply(m, 1, function(x)
# {
# r <- rank(x, ties.method = "first")
# r <- max(r) - r + 1
# x[r > T] <- 0
# return(x)
# }))
# return(ret)
# }
# get_distances_from_attractors <- function(m, tab, col.names.att, col.names.matrix, dist.thresh)
# {
# att <- as.matrix(tab[, col.names.att])
# row.names(att) <- as.character(1:nrow(tab))
# m <- as.matrix(m[, col.names.matrix])
# dd <- t(apply(m, 1, function(x, att) {cosine_similarity_from_matrix(x, att)}, att))
# dd <- distance_from_attractor_hard_filter(dd, tab, col.names.att, thresh = 1) ##marque page
# dist.thresh <- quantile(dd, probs = 0.85, na.rm = T)
# dist.thresh <- max(c(dist.thresh, 0.5))
# dd[is.na(dd)] <- 0 #This can happen if one of the attractors has all 0's for the markers of interest
# dd <- filter_similarity_matrix(dd, dist.thresh)
# return(dd)
# }
# distance_from_attractor_hard_filter <- function(dd, tab, col.names.att, thresh = 0.5)
# {
# tab <- tab[, col.names.att]
# w <- apply(tab[,col.names.att], 1, function(x, thresh) {all(x < thresh)}, thresh = thresh)
# if(any(w))
# print("Hard removing some connections to unstained landmarks")
# dd[,w] <- 0
# return(dd)
# }
# complete.forceatlas2 <- function(G, first.iter = 1000, overlap.iter, overlap_method = NULL, ...)
# {
# print("First iteration")
# ret <- layout.forceatlas2(G, prevent.overlap = FALSE, iter = first.iter, ...)
# lay <- ret$lay
# #plot(ret$avg_displ, type = "l")
# #lines(ret$max_displ, col = "red")
# G <- set.vertex.attribute(G, name = "x", value = lay[, 1])
# G <- set.vertex.attribute(G, name = "y", value = lay[, 2])
# if(!is.null(overlap_method))
# {
# if(overlap_method == "repel")
# {
# print("Second iteration with prevent overalp")
# ret <- layout.forceatlas2(G, prevent.overlap = TRUE, iter = overlap.iter, ...)
# lay <- ret$lay
# if(any(is.na(lay)))
# {
# print("Prevent overlap iteration failed")
# }
# #plot(ret$avg_displ, type = "l")
# #lines(ret$max_displ, col = "red")
# else
# {
# G <- set.vertex.attribute(G, name = "x", value = lay[, 1])
# G <- set.vertex.attribute(G, name = "y", value = lay[, 2])
# }
# }
# else if(overlap_method == "expand")
# G <- adaptive_expand(G, overlap.iter)
# }
# return(G)
# }
# # Generated by using Rcpp::compileAttributes() -> do not edit by hand
# # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
# layout_forceatlas2Cpp <- function(lay, F_att_orig, mass, nodes_size, edge_list, avg_displ, kgrav, iter, prevent_overlap, fixed, max_displ, stopping_tolerance, barnes_hut) {
# layout_forceatlas2Cpp(lay, F_att_orig, mass, nodes_size, edge_list, avg_displ, kgrav, iter, prevent_overlap, fixed, max_displ, stopping_tolerance, barnes_hut)
# }
# layout.forceatlas2 <- function(G, ew_influence = 1, kgrav = 1, iter = 1000, prevent.overlap = FALSE, fixed = rep(FALSE, vcount(G)), stopping_tolerance = 0.001, barnes_hut = FALSE)
# {
# if(vcount(G) >= 2000)
# barnes_hut <- TRUE
# if(vcount(G) > 2000)
# stopping_tolerance <- 0.01
# else if(vcount(G) > 800)
# stopping_tolerance <- 0.005
# else
# stopping_tolerance <- 0.001
# if(is.null(get.vertex.attribute(G, "x")))
# {
# lay <- cbind(x = rnorm(vcount(G)), y = rnorm(vcount(G)))
# }
# else
# {
# lay <- cbind(x = V(G)$x, y = V(G)$y)
# }
# #This is only used with prevent.overlap
# if(is.null(get.vertex.attribute(G, "size")))
# V(G)$size <- rep(10, vcount(G))
# mass <- 1 + degree(G)
# F_att <- (E(G)$weight ^ ew_influence)
# edge_list <- get.edgelist(G, names = F) - 1 #This is gonna be used in the C code where the indexing is 0-based
# avg_displ <- numeric(iter)
# max_displ <- numeric(iter)
# print(system.time(layout_forceatlas2Cpp(lay, F_att, mass, V(G)$size, edge_list, avg_displ,
# kgrav, iter, prevent.overlap, fixed, max_displ, stopping_tolerance, barnes_hut)))
# return(list(lay = lay, avg_displ = avg_displ, max_displ = max_displ))
# }
# add_inter_clusters_connections <- function(G, col.names, weight.factor, method = "cosine")
# {
# tab <- get_vertex_table(G)
# tab <- tab[tab$type == 2,]
# m <- as.matrix(tab[, col.names])
# row.names(m) <- tab$name
# #Any distance metric can be added here
# if (method == "cosine") {
# dd <- cosine_similarity_matrix(m)
# diag(dd) <- 0
# dd[is.na(dd)] <- 0 #This can happen if one of the attractors has all 0's for the markers of interest
# dist.thresh <- quantile(dd, probs = 0.85, na.rm = T)
# dist.thresh <- max(c(dist.thresh, 0.5))
# dd <- filter_similarity_matrix(dd, dist.thresh)
# dd <- filter_similarity_matrix_by_rank(dd, 3)
# }
# else if (method == "euclidean") {
# dd <- euclid_similarity_matrix(m)
# diag(dd) <- 0
# dd[is.na(dd)] <- 0
# dist.thresh <- quantile(dd, probs = 0.7, na.rm = T)
# dist.thresh <- max(c(dist.thresh, 0.5))
# dd <- filter_similarity_matrix(dd, dist.thresh)
# dd <- filter_similarity_matrix_by_rank(dd, 3)
# }
# e.list <- NULL
# for(i in 1:nrow(dd))
# {
# v <- dd[i,]
# v <- v[v > 0]
# if(length(v) > 0)
# {
# e.list <- rbind(e.list, data.frame(a = tab[i, "name"], b = names(v), weight = v, stringsAsFactors = FALSE))
# }
# }
# temp <- as.matrix(e.list[, c("a", "b")])
# temp <- t(apply(temp, 1, sort))
# e.list <- data.frame(temp, weight = e.list$weight, stringsAsFactors = FALSE)
# names(e.list)[1:2] <- c("a", "b")
# e.list <- e.list[!duplicated(e.list[, c("a", "b")]),]
# e.list.igraph <- c(t(as.matrix(e.list[, c("a", "b")])))
# #G <- G + edges(e.list, weight = (v ^ 30))
# G <- G + edges(e.list.igraph, weight = e.list$weight * weight.factor)
# return(G)
# }
# adaptive_expand <- function(G, max.iter)
# {
# print("Starting adaptive expansion")
# x <- V(G)$x
# y <- V(G)$y
# m <- cbind(x, y)
# ss <- outer(V(G)$size, V(G)$size, "+")
# for(i in 1:max.iter)
# {
# dd <- as.matrix(dist(m), method = "euclidean")
# dd <- dd - ss
# dd <- dd[upper.tri(dd)]
# if(all(dd >= 0))
# break
# else
# m <- m * 1.2
# }
# print(sprintf("Expansion stopped at iteration: %d", i))
# V(G)$x <- m[, "x"]
# V(G)$y <- m[, "y"]
# return(G)
# }
# get_vertex_table <- function(G)
# {
# att <- list.vertex.attributes(G)
# ret <- NULL
# for(a in att)
# {
# d <- data.frame(get.vertex.attribute(G, a), stringsAsFactors = FALSE)
# if(is.null(ret))
# ret <- d
# else
# ret <- cbind(ret, d, stringsAsFactors = FALSE)
# }
# names(ret) <- att
# return(ret)
# }
# add_attractors_labels <- function(G, v)
# {
# V(G)$name[1:length(v)] <- V(G)$Label[1:length(v)] <- v
# return(G)
# }
# get_dataset_statistics <- function(dataset)
# {
# graphs <- dataset$graphs
# ret <- NULL
# for(G in graphs)
# {
# V(G)$popsize.relative <- V(G)$popsize / sum(V(G)$popsize, na.rm = T)
# tab <- get.data.frame(G, what = "vertices")
# max.vals <- sapply(tab, function(x) {if(is.numeric(x)) return(max(x, na.rm =T))})
# max.vals <- max.vals[!sapply(max.vals, is.null)]
# for(i in 1:length(max.vals))
# {
# var.name <- names(max.vals)[i]
# if(!(var.name %in% names(ret)) || max.vals[[i]] > ret[[var.name]])
# ret[var.name] <- max.vals[i]
# }
# }
# return(list(max.marker.vals = ret))
# }
# ## 3. Map ---------------------------------------------------------------------
# #################################################################################
# reactiveNetwork <- function (outputId)
# {
# HTML(paste("<div id=\"", outputId, "\" class=\"shiny-network-output\"><svg /></div>", sep=""))
# }
# get_numeric_vertex_attributes <- function(sc.data, sel.graph)
# {
# G <- sc.data$graphs[[sel.graph]]
# d <- get.data.frame(G, what = "vertices")
# #Don't consider attributes which are only present in the landmarks
# d <- d[d$type == 2,]
# num <- sapply(d, function(x) {is.numeric(x) && !any(is.na(x))})
# v <- list.vertex.attributes(G)[num]
# v <- v[grep("@", v, invert = T)]
# exclude <- c("x", "y", "cellType", "type", "groups", "r", "g", "b", "size", "DNA1", "DNA2", "BC1", "BC2", "BC3", "BC4", "BC5", "BC6", "Time", "Cell_length", "Cisplatin", "beadDist", "highest_scoring_edge")
# return(v[!(v %in% exclude)])
# }
# combine_marker_sample_name <- function(sel.marker, active.sample)
# {
# if(active.sample == "All" || active.sample == "Absolute" || sel.marker == "Default")
# return(sel.marker)
# else
# return(paste(sel.marker, active.sample, sep = "@"))
# }
# get_sample_names <- function(sc.data, sel.graph)
# {
# G <- sc.data$graphs[[sel.graph]]
# s <- list.vertex.attributes(G)
# if (sel.graph != names(sc.data$graphs)[1]) {
# ids <- which(list.vertex.attributes(sc.data$graphs[[1]])%in%s)
# s <- lapply(1:length(s), function(i) {
# if (i%in%ids) {
# return(NULL)
# } else {return(s[[i]])}
# })
# }
# s <- grep("@", s, value = T)
# ret <- sapply(strsplit(s, "@"), function (x) {x[[2]]})
# if (length(ret) == 0) {
# ret <- list(1)
# }
# return(unique(ret))
# }
# get_graph <- function(sc.data, sel.graph, node.size.attr, min.node.size, max.node.size, landmark.node.size)
# {
# G <- sc.data$graphs[[sel.graph]]
# edges <- data.frame(get.edgelist(G, names = F) - 1)
# colnames(edges) <- c("source", "target")
# svg.width <- 1200
# svg.height <- 800
# svg.center <- c(svg.width/2, svg.height/2)
# x <- V(G)$x
# y <- V(G)$y
# y <- -1 * y
# x <- x + abs(min(x))
# y <- y + abs(min(y))
# num.landmarks <- sum(V(G)$type == 1)
# trans <- get_graph_centering_transform(x[V(G)$type == 1], y[V(G)$type == 1], svg.width, svg.height)
# x <- (x / trans$scaling) - trans$offset.x
# y <- (y / trans$scaling) - trans$offset.y
# vertex.size <- get_vertex_size(sc.data, sel.graph, svg.width, node.size.attr, min.node.size, max.node.size, landmark.node.size)
# edges <- cbind(edges, x1 = x[edges[, "source"] + 1], x2 = x[edges[, "target"] + 1])
# edges <- cbind(edges, y1 = y[edges[, "source"] + 1], y2 = y[edges[, "target"] + 1])
# edges <- cbind(edges, id = 1:nrow(edges))
# edges <- cbind(edges, is_highest_scoring = 0)
# edges <- cbind(edges, edge_type = "")
# #Set as true for the highest scoring edges of type 2 vertices
# edges[, "is_highest_scoring"][V(G)$highest_scoring_edge[V(G)$type == 2]] <- 1
# if("edge_type" %in% list.edge.attributes(G)) #Old graphs did not have this
# edges[, "edge_type"] <- E(G)$edge_type
# ret <- list(names = V(G)$Label, size = vertex.size / trans$scaling, type = V(G)$type, highest_scoring_edge = V(G)$highest_scoring_edge, X = x, Y = y)
# ret <- c(ret, edges = list(edges))
# return(ret)
# }
# get_color_for_marker <- function(sc.data, sel.marker, rel.to.sample, sel.graph, active.sample, color.scaling,
# stats.type, colors.to.interpolate, color.under, color.over, color.scale.limits = NULL,
# color.scale.mid = NULL) {
# G <- sc.data$graphs[[sel.graph]]
# if(sel.marker == "Default") {
# ret <- rep("#4F93DE", vcount(G))
# ret[V(G)$type == 1] <- "#FF7580"
# return(list(color.vector = ret, color.scale.lim = NULL))
# } else {
# v <- get.vertex.attribute(G, combine_marker_sample_name(sel.marker, active.sample))
# f <- colorRamp(colors.to.interpolate, interpolate = "linear")
# if(rel.to.sample != "Absolute") {
# rel.to.marker <- combine_marker_sample_name(sel.marker, rel.to.sample)
# if(stats.type == "Difference")
# v <- v - (get.vertex.attribute(G, rel.to.marker))
# else if(stats.type == "Ratio")
# v <- v / (get.vertex.attribute(G, rel.to.marker))
# v[is.infinite(v)] <- NA
# }
# color.scale.lim <- NULL
# if(color.scaling == "local") color.scale.lim <- list(min = min(v, na.rm = T), max = max(v, na.rm = T))
# if(!is.null(color.scale.limits)) {
# under <- v < color.scale.limits[1]
# over <- v > color.scale.limits[2]
# v[under] <- color.scale.limits[1]
# v[over] <- color.scale.limits[2]
# if(is.null(color.scale.mid))
# v <- scales::rescale(v)
# else
# v <- scales::rescale_mid(v, mid = color.scale.mid)
# v <- f(v)
# v <- apply(v, 1, function(x) {sprintf("rgb(%s)", paste(round(x), collapse = ","))})
# v[under] <- sprintf("rgb(%s)", paste(col2rgb(color.under), collapse = ","))
# v[over] <- sprintf("rgb(%s)", paste(col2rgb(color.over), collapse = ","))
# } else {
# v <- f(scales::rescale(v)) #colorRamp needs an argument in the range [0, 1]
# v <- apply(v, 1, function(x) {sprintf("rgb(%s)", paste(round(x), collapse = ","))})
# }
# return(list(color.vector = v, color.scale.lim = color.scale.lim))
# }
# }
# get_number_of_cells_per_landmark <- function(sc.data, sel.graph)
# {
# G <- sc.data$graphs[[sel.graph]]
# land <- V(G)[V(G)$type == 1]$Label
# ee <- get.edgelist(G)
# ee <- ee[V(G)[V(G)$type == 2]$highest_scoring_edge,]
# vv <- V(G)[as.numeric(ee[,2])]
# popsize <- V(G)[vv]$popsize
# dd <- data.frame(Landmark = ee[,1], popsize)
# dd <- ddply(dd, ~Landmark, function(x) {sum(x["popsize"])})
# dd <- cbind(dd, Percentage = dd$V1 / sum(dd$V1))
# names(dd) <- c("Landmark", "Cells", "Percentage")
# dd$Percentage <- signif(dd$Percentage * 100, digits = 4)
# # popsize <- V(G)[vv][["popsize"]]
# # vnames <- names(get.vertex.attribute(G))
# # View(vnames)
# # pops <- get.vertex.attribute(G)[grep("popsize", vnames, value = T)]
# # View(pops)
# return(dd)
# }
# get_cells_per_landmark_all_files <- function (sc.data)
# {
# cells <- lapply(c(1:length(sc.data$graphs)), function(i) {
# print(i)
# name <- names(sc.data$graphs)[i]
# cells <- get_number_of_cells_per_landmark(sc.data, name)
# return(cells)
# })
# m <- matrix(nrow = length(sc.data$graphs), ncol = nrow(cells[[1]])*2)
# rownames(m) <- names(sc.data$graphs)
# colnames(m) <- sapply(c(1:ncol(m)), function(i) {
# name <- as.vector(cells[[1]][,1])[ceiling(i/2)]
# if (gtools::odd(i) == TRUE) {name <- paste0(name, " #events")}
# else {name <- paste0(name, " %percentage")}
# return(name)
# })
# i <- 0
# j <- 0
# lapply(c(1:length(cells)), function(i) {
# lapply(c(1:nrow(cells[[i]])), function(j) {
# match <- pmatch(cells[[i]][j, ][[1]], as.vector(cells[[1]]$Landmark))
# m[i, (match*2)-1] <<- cells[[i]][j, ][[2]]
# m[i, (match*2)] <<- cells[[i]][j, ][[3]]
# })
# })
# m[is.na(m)] <- 0
# return(m)
# }
# get_summary_table <- function(sc.data, sel.graph, sel.nodes)
# {
# G <- sc.data$graphs[[sel.graph]]
# col.names <- get_numeric_vertex_attributes(sc.data, sel.graph)
# tab <- get.data.frame(G, what = "vertices")
# temp <-tab[tab$Label %in% sel.nodes,]
# ret <- temp[, col.names]
# ret <- rbind(ret, apply(ret, 2, median, na.rm = T))
# popsize <- data.frame(Cells = temp$popsize, Percentage = temp$popsize / sum(tab$popsize[tab$type == 2]))
# popsize <- rbind(popsize, colSums(popsize))
# ret <- cbind(popsize, ret)
# ret <- data.frame(Label = c(temp$Label, "Summary"), ret)
# ret$Percentage <- signif(ret$Percentage * 100, digits = 4)
# return(ret)
# }
# plot_cluster <- function(data, clusters, graph.name, col.names, pool.cluster.data, plot.type)
# {
# G <- data$graphs[[graph.name]]
# gated_data <- data$landmarks.data
# clustered_data <- data$clustered.data[[graph.name]]
# names(clustered_data) <- gsub("^X", "", names(clustered_data))
# names(gated_data) <- gsub("^X", "", names(gated_data))
# #This only works if the col.names are actually present in the clustered.data
# #TODO: figure out a consistent way to deal with panel mismatches
# common.names <- col.names[(col.names %in% names(clustered_data)) & (col.names %in% names(gated_data))]
# clustered_data <- clustered_data[, c(col.names, "cellType")]
# gated_data <- gated_data[, c(common.names, "cellType")]
# gated_data <- add_missing_columns(gated_data, col.names, fill.data = NA)
# #Select only the landmark nodes that are connected to these clusters
# land <- V(G)[nei(V(G)$Label %in% clusters)]$Label
# land <- V(G)[(V(G)$Label %in% land) & V(G)$type == 1]$Label
# temp <- gated_data[gated_data$cellType %in% land,]
# clus.num <- as.numeric(gsub("c", "", clusters))
# temp.clustered <- clustered_data[clustered_data$cellType %in% clus.num, ]
# if(pool.cluster.data)
# temp.clustered$cellType <- "Clusters"
# temp <- rbind(temp, temp.clustered)
# p <- NULL
# if(plot.type == "Scatterplot")
# {
# p <- density_scatterplot(temp, x_name = col.names[1], y_name = col.names[2], grouping = "cellType")
# }
# else
# {
# temp <- melt(temp, id.vars = "cellType")
# temp$variable <- as.factor(temp$variable)
# if(plot.type == "Density")
# {
# p <- ggplot(aes(x = value, color = cellType), data = temp) + geom_density() + facet_wrap(~variable, scales = "free")
# }
# else if(plot.type == "Boxplot")
# {
# p <- ggplot(aes(x = variable, fill = cellType, y = value), data = temp) + geom_boxplot()
# }
# }
# plot(p)
# return(p)
# }
# density_scatterplot <- function(tab, x_name, y_name, grouping)
# {
# m <- ddply(tab, grouping, function(m, x_name, y_name)
# {
# colramp <- grDevices::colorRampPalette(c("black", "red", "yellow"))
# dens.col <- grDevices::densCols(m[, x_name], m[, y_name], colramp = colramp)
# return(data.frame(m, dens.col = dens.col))
# }, x_name = x_name, y_name = y_name)
# maxx <- max(m[, x_name], na.rm = T) + 0.5
# maxy <- max(m[, y_name], na.rm = T) + 0.5
# (p <- ggplot(aes_string(x = x_name, y = y_name, color = "dens.col", size = 1), data = m)
# + facet_wrap(grouping)
# + geom_point()
# + scale_colour_identity()
# + scale_size_identity()
# + xlim(0, maxx)
# + ylim(0, maxy)
# )
# return(p)
# }
# export_clusters <- function(working.dir, sel.graph, sel.nodes)
# {
# d <- gsub(".txt$", ".all_events.RData", sel.graph)
# d <- file.path(working.dir, d)
# d <- my_load(d)
# clus <- as.numeric(gsub("c", "", sel.nodes))
# d <- d[d$cellType %in% clus,]
# f <- flowFrame(as.matrix(d))
# p <- sprintf("scaffold_export_%s_", gsub(".fcs.clustered.txt", "", sel.graph))
# outname <- tempfile(pattern = p, tmpdir = working.dir, fileext = ".fcs")
# # write.FCS(f, outname, delimiter="#")
# write.FCS(f, outname)
# }
# get_graph_centering_transform <- function(x, y, svg.width, svg.height)
# {
# padding <- 50
# G.width <- max(x) - min(x)
# G.height <- max(y) - min(y)
# scaling <- max(c(G.width / (svg.width - (padding * 2)), G.height / (svg.height - (padding * 2))))
# x <- x / scaling
# y <- y / scaling
# offset.y <- min(y) - padding
# graph.x.center <- (min(x) + max(x)) / 2
# offset.x <- graph.x.center - (svg.width / 2)
# return(list(offset.x = offset.x, offset.y = offset.y, scaling = scaling))
# }
# get_vertex_size <- function(sc.data, sel.graph, figure.width, node.size.attr, min.node.size, max.node.size, landmark.node.size)
# {
# G <- sc.data$graphs[[sel.graph]]
# size.attr <- get.vertex.attribute(G, node.size.attr)
# ret <- size.attr / sum(size.attr, na.rm = T)
# ret <- rescale_size(max.node.size, min.node.size, sc.data$dataset.statistics$max.marker.vals[["popsize.relative"]], ret)
# ret[V(G)$type == 1] <- landmark.node.size
# return(ret)
# }
# rescale_size <- function(max.size, min.size, max.val, x)
# {
# return(((max.size - min.size) * x) / max.val + min.size);
# }
# add_missing_columns <- function(m, col.names, fill.data)
# {
# v <- col.names[!(col.names %in% colnames(m))]
# print(sprintf("Adding missing columns: %s", paste(v, collapse = ", ")))
# ret <- matrix(nrow = nrow(m), ncol = length(v), data = fill.data)
# colnames(ret) <- v
# ret <- data.frame(m, ret, check.names = F)
# return(ret)
# }
# # 4 Mapping dataset ---------------------------------------------------------
# #################################################################################
# #Runs the existing analysis, mainly like the original code but with a few tricks due to how data are processed in this version.
# run_analysis_existing <- function(scaffold,
# refClusteredFiles, clusteredTables, outputDir, col.names.matrix, col.names.map,
# names.mapping = NULL, inter.cluster.connections, mode, names.map, col.names.inter_cluster,
# inter_cluster.weight_factor, overlap_method, ew_influence)
# {
# files.list <- clusteredTables
# print(sprintf("Markers used for SCAFFoLD: %s", paste(col.names.map, collapse = ", ")))
# print(paste("Using as reference", refClusteredFiles[1], sep = " "))
# ref.scaffold.file <- refClusteredFiles[2]
# ref.scaffold.data <- scaffold
# ref.scaffold.markers <- ref.scaffold.data$scaffold.col.names
# l <- load_existing_layout(ref.scaffold.data)
# tab.attractors <- l$tab.attractors
# G.attractors <- l$G.attractors
# att.labels <- V(G.attractors)$Label
# ret <- process_files(files.list, scaffold$G[[1]], G.attractors, tab.attractors, att.labels, col.names.matrix = col.names.matrix, col.names.gated = col.names.map,
# scaffold.mode = "existing", ref.scaffold.markers = ref.scaffold.markers, names.mapping = names.mapping)
# ret <- c(list(scaffold.col.names = col.names.map, landmarks.data = ref.scaffold.data$landmarks.data), ret)
# # if (mode == "Concatenation") {
# init <- length(ref.scaffold.data)
# retgraphs <- as.vector(ret$graphs)
# graphnames <- names(retgraphs)
# for (i in c(1:length(retgraphs))) {
# ref.scaffold.data$graphs[names(retgraphs)[i]] <- retgraphs[i]
# }
# ret <- ref.scaffold.data
# my_save(ret, paste(outputDir, sprintf("%s.scaffold", refClusteredFiles[1]), sep = "/"))
# # }
# # else {
# # my_save(ret, paste(outputDir, sprintf("%s.scaffold", refClusteredFiles[1]), sep = "/"))
# # }
# return(ret)
# }
# load_existing_layout <- function(scaffold.data)
# {
# G <- scaffold.data$graphs[[1]]
# G <- induced.subgraph(G, V(G)$type == 1, impl = "copy_and_delete")
# tab <- get_vertex_table(G)
# V(G)$name <- 1:vcount(G)
# return(list(G.attractors = G, tab.attractors = tab))
# }
# # 5 Export Scaffold ---------------------------------------------------------------------
# #################################################################################
# scaffold_pop_export <- function(scaffold.data){
# map <- scaffold.data
# all.table <- lapply(c(1:length(map$graphs)), function(i){
# G <- map$graphs[[i]]
# x <- V(G)$x
# y <- V(G)$y
# popsize <- V(G)$popsize
# if(is.null(popsize)){popsize <- (rep(1,length(x)))}
# tab <- data.frame(x,y,popsize)#,label)
# return(tab)
# })
# names(all.table) <- names(map$graphs)
# return(all.table)
# }
# scaffold_node_export <- function(scaffold.data){
# all.table <- scaffold_pop_export(scaffold.data)
# node.index.x <- which(V(scaffold.data$graphs[[1]])$type==1)
# table.node <- data.frame(all.table[[1]][node.index.x,c(1,2)])
# G <- scaffold.data$graphs[[1]]
# pop.name <- unlist(lapply(c(1:dim(table.node)[1]),function(x){return(names(G[[x]]))}))
# table.node <- cbind(pop.name, c(1:nrow(table.node)),table.node)
# colnames(table.node) <- c("pop Names","pop ID","Map.x","Map.y")
# return(table.node)
# }
# scaffold_cluster_export <- function(list1,list2,list.txt,scaffold.data){
# node.index.x <- which(V(scaffold.data$graphs[[1]])$type==1)
# all.table <- scaffold_pop_export(scaffold.data)
# multi.tables <- lapply(c(1:length(list1)), function(x){
# # print(x)
# d <- all.table[[list1[x]]]
# txt <- list.txt[[list2[x]]]
# table <- data.frame(d[-node.index.x,])
# tot <- sum(table[,"popsize"])
# # print(tot)
# temp <- as_data_frame(scaffold.data$graphs[[list1[x]]])
# pop <- temp[which(temp[,"edge_type"]=="highest_scoring"),"from"]
# # print(pop)
# table <- cbind(table,table[,3],pop,txt)
# table[,3] <- (table[,3]/tot)*100
# colnames(table) <- c("Scaffold.x","Scaffold.y","PercentOfTotal","Cluster.Size","pop",colnames(txt)) #clusterID,x,y,Events,Percentile,MFIs,
# return(table)
# })
# return(multi.tables)
# }
# scaffold_events_export <- function(list1, list2, list.flow.frames, scaffold.data, marker_e){
# landmark <- scaffold_node_export(scaffold.data)
# all.table <- scaffold_pop_export(scaffold.data)
# node.index.x <- which(V(scaffold.data$graphs[[1]])$type==1)
# flow.frames.celltype <- lapply(c(1:length(list1)),function(x){
# fcs <- list.flow.frames[[list2[x]]]
# d <- all.table[[list1[x]]]
# table <- get_matrix_from_fcs(1, list(fcs), "mean",marker_e)
# # print(table)
# tot <- sum(table[,"popsize"])
# temp <- as_data_frame(scaffold.data$graphs[[list1[x]]])
# pop <- temp[which(temp[,"edge_type"]=="highest_scoring"),"from"]
# table <- cbind(table,table[,"popsize"],pop)
# table[,dim(table)[2]-1] <- (as.numeric(table[,"popsize"])/tot)*100
# colnames(table)[dim(table)[2]-1] <- "PercentOfTotal"
# rdata <- fcs@exprs
# new_col.1 <- matrix(rdata[,marker_e], nrow = nrow(rdata), ncol = 1, dimnames = list(NULL, marker_e))
# new_col.2 <- as.vector(table[new_col.1,"pop"])
# pop.index <- unlist(lapply(new_col.2,function(j){
# ### QUICK DEBUG HERE################################################
# #if(length(grep(i,landmark[,"pop Names"]))>0){#######################
# return(landmark[which(j==landmark[,"pop Names"]),"pop ID"])
# #}###################################################################
# ####################################################################
# }))
# popIDscaffold <- as.matrix(as.numeric(pop.index))
# colnames(popIDscaffold) <- "popIDscaffold"
# # fcs.2 <- flowCore::cbind2(fcs, popIDscaffold)
# fcs.2 <- enrich.FCS.CIPHE(fcs, popIDscaffold)
# return(fcs.2)
# })
# return(flow.frames.celltype)
# }
# scaffold_pop_mfi <- function(list1, list2, list.flow.frames, scaffold.data, marker_e, methods="median"){
# landmark <- scaffold_node_export(scaffold.data)
# all.table <- scaffold_pop_export(scaffold.data)
# node.index.x <- which(V(scaffold.data$graphs[[1]])$type==1)
# table.mfi.pop <- lapply(c(1:length(list1)),function(x){
# fcs <- list.flow.frames[[list2[x]]]
# print(fcs)
# d <- all.table[[list1[x]]]
# table <- get_matrix_from_fcs(1, list(fcs), "mean",marker_e)
# tot <- sum(table[,"popsize"])
# temp <- as_data_frame(scaffold.data$graphs[[list1[x]]])
# pop <- temp[which(temp[,"edge_type"]=="highest_scoring"),"from"]
# table <- cbind(table,table[,"popsize"],pop)
# table[,dim(table)[2]-1] <- (as.numeric(table[,"popsize"])/tot)*100
# colnames(table)[dim(table)[2]-1] <- "PercentOfTotal"
# rdata <- fcs@exprs
# new_col.1 <- matrix(rdata[,marker_e], nrow = nrow(rdata), ncol = 1, dimnames = list(NULL, marker_e))
# new_col.2 <- as.vector(table[new_col.1,"pop"])
# pop.index <- unlist(lapply(new_col.2,function(j){return(landmark[which(j==landmark[,"pop Names"]),"pop ID"])}))
# popIDscaffold <- as.matrix(as.numeric(pop.index))
# colnames(popIDscaffold) <- "popIDscaffold"
# # fcs <- flowCore::cbind2(fcs, popIDscaffold)
# fcs <- enrich.FCS.CIPHE(fcs, popIDscaffold)
# res <- lapply(unique(fcs@exprs[,"popIDscaffold"]),function(y){
# mat <- fcs@exprs[which(fcs@exprs[,"popIDscaffold"]==y),]
# if(dim(mat)[1]< 2 || is.null(dim(mat))){return(c(0,0))}
# return(c(apply(mat,2,mean),dim(mat)[1]))
# })
# table.mfi <- do.call(rbind, res)
# colnames(table.mfi)[dim(table.mfi)[2]] <- "Count"
# return(table.mfi)
# })
# return(table.mfi.pop)
# }
# get.available.ram <- function()
# {
# available.ram <- 0
# if(Sys.info()[['sysname']] == "Windows")
# {
# available.ram <- system("wmic OS get FreePhysicalMemory /Value", intern=T)[3]
# available.ram <- strsplit(available.ram, "=", fixed = T)[[1]][[2]]
# available.ram <- as.numeric(strsplit(available.ram, "\r", fixed = T)[[1]][[1]])*1024
# }
# return(available.ram)
# }
# get.nmb.cores.max <- function(files.sizes, #Liste ou vecteur contenant les tailles des fichiers en bytes
# available.cores, #Nombre de coeursdisponibles
# x.cores = 0.5, #Coefficient de réduction du nombres de coeurs(ex: x.cores=0.5 ==> la moitié des coeurs utilisables)
# x.ram = 0.5, #Coefficient de réduction de la RAM
# correction.coef = 1, #Erreur relative au calcul de la mémoire par coeur.
# separate.by.files = T) #Un fichier par coeur si T; Tous les coeurs utilisent tous les fichiers si F
# #correction.coef = 1 ==> pas de changement
# #correction.coef = 1.2 ==> prévoir une utilisation 20% plus importante de ram par coeur
# {
# available.ram <- get.available.ram()
# max.size <- 0
# Nk <- 0
# if(separate.by.files)
# {
# max.size <- max(as.numeric(unlist(files.sizes)))
# for (k in 1:as.integer(available.cores*x.cores))
# {
# mean.size <- length(unlist(files.sizes)) * (max.size) +
# (3.309e+07 + 1.584*max.size) * k
# if( (mean.size * correction.coef) <= available.ram*x.ram )
# {
# Nk <- Nk + 1
# }
# }
# }
# else
# {
# for(i in 1:length(files.sizes))
# {
# max.size <- max.size + files.sizes[[i]]
# max.size <- max.size + 3.309e+07 + 1.584*files.sizes[[i]] #Clara ram peak: p = 3.3e7 + 1.6 * file_size
# }
# print(max.size/1024/1024)
# for (k in 1:as.integer(available.cores*x.cores))
# {
# if(k*max.size*correction.coef <= available.ram*x.ram)
# {
# Nk <- Nk + 1
# }
# }
# }
# return(Nk)
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.