knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE)
colnames(comped_nnls_exprs) #the compensated data flowframe: comped_nnls, exprs: comped_nnls_exprs cols_metal <- c("Pr141Di", "Nd142Di", "Nd143Di", "Nd144Di", "Nd145Di", "Nd146Di", "Sm147Di", "Nd148Di", "Sm149Di", "Nd150Di", "Eu151Di", "Sm152Di", "Eu153Di", "Sm154Di", "Gd155Di", "Gd156Di", "Gd158Di", "Tb159Di", "Gd160Di", "Dy161Di")
#get_spill_cols from CATALYST package get_spill_cols <- function(ms, mets, l=CATALYST::isotope_list) { ms <- as.numeric(ms) spill_cols <- vector("list", length(ms)) for (i in seq_along(ms)) { p1 <- m1 <- ox <- iso <- NULL if ((ms[i] + 1) %in% ms) p1 <- which(ms == (ms[i] + 1)) if ((ms[i] - 1) %in% ms) m1 <- which(ms == (ms[i] - 1)) if ((ms[i] + 16) %in% ms) ox <- which(ms == (ms[i] + 16)) iso <- l[[mets[i]]] iso <- which(ms %in% iso[iso != ms[i]]) spill_cols[[i]] <- unique(c(m1, p1, iso, ox)) } spill_cols } spill_cols_data <- function(data){ ##get ms and mets chs <- colnames(data) ####metal mass number like 167,*** ms <- as.numeric(regmatches(chs, gregexpr("[0-9]+", chs))) ####metal name mets <- gsub("[[:digit:]]+Di", "", chs) ##get spillover cols spill_cols <- get_spill_cols(ms, mets) return(spill_cols) } #function generate a simulated spillover matrix sm_simulation <- function(data, n, ...){ ##get ms and mets chs <- colnames(data) ####metal mass number like 167,*** ms <- as.numeric(regmatches(chs, gregexpr("[0-9]+", chs))) ####metal name mets <- gsub("[[:digit:]]+Di", "", chs) ##get spillover cols spill_cols <- get_spill_cols(ms, mets) sm <- vector("list",n) for (i in 1:n){ m = diag(length(spill_cols)) for (j in seq_along(spill_cols)){ for (k in spill_cols[[j]]){ m[j,k] <- runif(1, min=0, max=0.1) } } sm[[i]] <- m } return(sm) } #function generate simulated data data_simulation <- function(signal, sm, ...){ data <- vector("list",length(sm)) for (i in seq_along(sm)){ data[[i]] = signal%*%sm[[i]] colnames(data[[i]]) = colnames(signal) } return(data) } simulate_ture_signal <- function(){ set.seed(1) # single modal channel single <- sort(sample(c(1:20), 10)) double <- c(1:20)[-single] mean_single <- runif(10, min = 100, max = 600) mean_double1 <- runif(10, min = 100, max = 600) mean_double2 <- runif(10, min = 100, max = 600) data <- matrix(nrow = 100000, ncol = 20) j = 1 for (i in single){ data[,i] <- sample(c(rnorm(60000, mean = mean_single[j], sd = mean_single[j]/5), rep(0, 40000)), 100000) j = j + 1 } j = 1 for (i in double){ data[,i] <- sample(c(rnorm(30000, mean = mean_double1[j], sd = mean_double1[j]/5), rnorm(30000, mean = mean_double2[j], sd = mean_double2[j]/5), rep(0, 40000)), 100000) j = j + 1 } colnames(data) <- cols_metal return(data) }
library(tensorflow) tf$compat$v1$disable_v2_behavior() get_H_mask <- function(spill_cols){ n_cols <- length(spill_cols) H_mask <- matrix(0, n_cols, n_cols) for (i in 1:n_cols){ if (!is.null(spill_cols[[i]])){ for (j in spill_cols[[i]]){ H_mask[i,j] = 1 } } } return(H_mask) } get_A_mask <- function(data, cutoffs){ A_mask <- matrix(0, dim(data)[1], dim(data)[2]) for (i in 1:dim(data)[2]){ A_mask[which(data[,i] < cutoffs[i]), i] = 1 A_mask[which(data[,i] == 0), i] = 0 } return(A_mask) } train_NMF <- function(data, spill_cols, cutoffs){ #down sampling if necessary # data <- data[sample(nrow(data),5000),] tf$compat$v1$disable_v2_behavior() #get masks for A and H A_mask <- get_A_mask(data, cutoffs) H_mask <- get_H_mask(spill_cols) A = tf$constant(data) shape = dim(data) #initializing W,H # temp_H = matrix(rnorm(shape[2]*shape[2]),ncol=shape[2]) # temp_H = temp_H/max(temp_H) # # temp_W = matrix(rnorm(shape[1]*shape[2]),ncol=shape[2]) # temp_W = temp_W/max(temp_W) # temp_H = matrix(0.05, shape[2], shape[2]) * H_mask temp_H = matrix(runif(shape[2]*shape[2],min=0,max=0.1), shape[2], shape[2]) * H_mask temp_W = data - data*A_mask # W = tf$Variable(temp_W) W = tf$constant(temp_W) H = tf$Variable(temp_H) A_mask = tf$constant(A_mask) H_mask = tf$constant(H_mask) H_masked = tf$multiply(H,H_mask) WH = tf$matmul(W, H_masked) # WH = tf$matmul(W, H) #cost of Frobenius norm # cost = tf$reduce_sum(tf$pow(tf$multiply(A,A_mask) - tf$multiply(WH,A_mask) - tf$multiply(W,A_mask), 2)) cost = tf$reduce_sum(tf$multiply(tf$pow(A - WH - W, 2),A_mask)) # cost = tf$reduce_sum(tf$norm(tf$multiply(A,A_mask) - tf$multiply(WH,A_mask))) # Learning rate lr = 0.001 # Number of steps steps = 1000 # train_step = tf$train$GradientDescentOptimizer(lr)$minimize(cost) train_step = tf$compat$v1$train$GradientDescentOptimizer(lr)$minimize(cost) # Clipping operation. This ensure that W and H learnt are non-negative # clip_W = W$assign(tf$maximum(tf$zeros_like(W), W)) clip_H1 = H$assign(tf$maximum(tf$zeros_like(H), H)) H_clip2 <- matrix(0.1, shape[2], shape[2]) clip_H2 = H$assign(tf$minimum(H_clip2, H)) # clip = tf$group(clip_W, clip_H) # clip = tf$group(clip_W,clip_H1, clip_H2) clip = tf$group(clip_H1, clip_H2) # Launch the graph and initialize the variables. sess = tf$compat$v1$Session() sess$run(tf$compat$v1$global_variables_initializer()) for (step in 1:steps) { sess$run(train_step) sess$run(clip) if (step %% 100 == 0){ cat("\nCost:",sess$run(cost), "\n") print("********************************") } } # # learnt_W = sess$run(W) # learnt_H = sess$run(H) # learnt_H_masked = learnt_H*get_H_mask(spill_cols) learnt_H_masked = sess$run(H_masked) return(learnt_H_masked) }
set.seed(1) true_signal <- simulate_ture_signal() sm_simulated <- sm_simulation(data = true_signal, n = 20) data_simulated <- data_simulation(signal = true_signal, sm = sm_simulated)
library(CytoSpill) estimate_sm <- function(data, spill_cols){ my_sm = vector("list",length(data)) my_sm_nmf = vector("list",length(data)) for (i in seq_along(data)){ cutoffs_sim <- .DeriveCutoffs(data=data[[i]], cols=c(1:ncol(data[[i]])), n = 10000, flexrep = 5) threshold <- 0.1 model <- .EstimateSpill(data=data[[i]], cutoffs=cutoffs_sim, cols=c(1:ncol(data[[i]])), upperbound = threshold, neighbor = 1) estimates <- model[[1]] xcols <- model[[2]] spillmat <- diag(length(xcols)) for (m in 1:length(xcols)) { if (!is.na(xcols[[m]][1])) { for (j in 1:length(xcols[[m]])) { if (!is.na(estimates[[m]][j])){ spillmat[xcols[[m]][j],m] <- min(estimates[[m]][j],threshold) } } } } sm_sim <- spillmat my_sm[[i]] <- sm_sim nmf_sim <- train_NMF(data[[i]], spill_cols, cutoffs = cutoffs_sim) my_sm_nmf[[i]] <- nmf_sim } return(list(my_sm, my_sm_nmf)) } # estimate_sm_nmf <- function(data, spill_cols) { # my_sm_nmf = vector("list",length(data)) # for (i in seq_along(data)){ # cutoffs_sim <- .DeriveCutoffs(data=data[[i]], cols=c(1:ncol(data[[i]])), n = 5000) # nmf_sim <- train_NMF(data[[i]], spill_cols, cutoffs = cutoffs_sim) # my_sm_nmf[[i]] <- nmf_sim # } # return(my_sm_nmf) # } set.seed(1) estimate_sm_simulated <- estimate_sm(data = data_simulated, spill_cols = spill_cols_data(true_signal)) ##simulated_results.Rdata
#number of simulated elements in each spillover matrix spill_cols <- spill_cols_data(true_signal) sum(sapply(spill_cols, length))
#trucate nmf estimated spillover effects with >0.1 or <0 value, also add diagnal to make spillover matrix, comparable to sqp results nmf_estimate_sm <- estimate_sm_simulated[[2]] for (i in seq_along(nmf_estimate_sm)){ nmf_estimate_sm[[i]][which(nmf_estimate_sm[[i]]<0)] <- 0 nmf_estimate_sm[[i]][which(nmf_estimate_sm[[i]]>0.1)] <- 0.1 nmf_estimate_sm[[i]] <- nmf_estimate_sm[[i]] + diag(20) }
library(ggplot2) sm_comparison <- function(x, y, spill_cols){ # errorlog = NULL comparison = NULL # errorpoint = NULL for (i in seq_along(x)){ single <- NULL # errorcount = 0 # errorcount2 = 0 for (j in seq_along(spill_cols)){ for (k in spill_cols[[j]]){ single <- rbind(single,c(x[[i]][j,k],y[[i]][j,k])) # if (y[[i]][j,k]>0.049 & y[[i]][j,k]<0.05 ) { # errorcount = errorcount+1 # errorpoint = rbind(errorpoint, c(j,k)) # } # if (y[[i]][j,k]==0) errorcount2 = errorcount2+1 } } # errorlog <- rbind(errorlog,c(errorcount,errorcount2)) comparison <- rbind(comparison,single) } colnames(comparison) <- c('simulation','my_method') # return(list(comparison,errorlog,errorpoint)) return(comparison) } slsqp_comparison <- sm_comparison(sm_simulated,estimate_sm_simulated[[1]],spill_cols = spill_cols) nmf_comparison <- sm_comparison(sm_simulated,nmf_estimate_sm,spill_cols = spill_cols) ggplot(as.data.frame(slsqp_comparison), aes(x=my_method, y=simulation)) + geom_point(size=0.1) + theme_classic() + xlab('Sequential quadratic programming estimated spillover effects') + ylab('Simulated spillover effects') + coord_fixed() ggplot(as.data.frame(nmf_comparison), aes(x=my_method, y=simulation)) + geom_point(size=0.1) + theme_classic() + xlab('NMF estimated spillover effects') + ylab('Simulated spillover effects') + coord_fixed()
cor(slsqp_comparison[,1],slsqp_comparison[,2])^2 cor(nmf_comparison[,1],nmf_comparison[,2])^2
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.