############################################################################################################################
#### This Scipt describes an example of running a dataset to impute values
#### based on KNN-CR, KNN-TN and KNN-EU
############################################################################################################################
source("Trunc_KNN/Imput_funcs.r")
source("Trunc_KNN/Simulation.r")
### Load libraries as some functions are dependent on them
library(MASS)
library(mvtnorm)
library(Matrix)
library(magrittr)
###########################################################################
##### Here we Simulate a dataset of size 50 Samples by 400 Metabolites
##### We will have a Complete Dataset and a Dataset with Missing Values
##### For this example we generate a Dataset with 15% total Missing where
##### 10% of the Data is missing below the Threshold and 5% missing above
##### the threshold. We also discard if there is missing greater than 75%.
############################################################################
set.seed(505)
CorrelationMatrix0.7 <- CorrMatrixNegFixed(20, 20, 0.7, 0.2)
DataSimul <- SimulatedData(50, 400, as.matrix(nearPD(CorrelationMatrix0.7)[[1]]), low = -5, high =5)
## Simulate 15% overall missing, 10% due to below LOD (MNAR), 5% MAR
## Only retain metabolites with <75% MVs
Missingness <- MissingData(DataSimul, 15, 5, 0.75)
CompleteData <- Missingness[[2]]
MissData <- Missingness[[1]]
dim(MissData) ## 50x390; After Screening we have 50 Samples and 390 Metabolites
sum(is.na(MissData)/length(MissData)) ## 13.5%, below 15% due to screening
LOD <- quantile(DataSimul, probs = 0.1)
##################################################################################
### We now do the different imputations and we provide the type of imputation and
### k neighbors
##################################################################################
(kNN_Corr_Imp <- imputeKNN(t(MissData), k=10 , distance = "correlation"))
(kNN_Euc_Imp <- KNNEuc(t(MissData), k=10))
(kNN_Trunc_Imp <- imputeKNN(t(MissData), k=10 , distance = "truncation", perc= 0.75))
###################################################################################
### We compute the RMSE since we have the CompleteData, MissingData and ImputedData
####################################################################################
RMSError <- ErrorsComputation(trunc=kNN_Trunc_Imp, corr=kNN_Corr_Imp, euc=kNN_Euc_Imp,
miss=MissData, complete = CompleteData)
names(RMSError) <- c("KNN-TN", "KNN-CR", "KNN-EU")
RMSError
## KNN-TN KNN-CR KNN-EU
## 1.161180 1.345218 1.606502
######################################################################################
### We look at the Distribution of Metabolites based on the Imputation Method
######################################################################################
## This plots the first 20 Metabolites of the Dataset and overlays the imputed values
## from the different methods in different colors
## Reproduces Figure 6 in the manuscript
dim(MissData)
sum(is.na(MissData[,1:20])) ## 190
png("Figure6.png", width = 7, height = 7, units = 'in', res = 600)
col.black <- rgb(0,0,0,alpha=20,maxColorValue=255)
col.black2 <- rgb(0,0,0,alpha=255,maxColorValue=255)
col.blue <- rgb(0,0,255,alpha=200,maxColorValue=255)
col.red <- rgb(255,0,0,alpha=200,maxColorValue=255)
col.green <- rgb(0,255,0,alpha=200,maxColorValue=255)
idx.na <- which(is.na(MissData[,1:20]))
plot(rep(1:20, each = 50)[-idx.na], CompleteData[,1:20][-idx.na],
xlab = "Metabolite", ylab = "Intensity Values", xlim=c(0, 20.5),
pch = 1, ylim = c(-8,8), col = col.black, cex = 0.9) ##, xaxt='n')
## Just use 'points' to add to a plot rather than the 'plot' command
points(rep(1:20, each = 50)[idx.na], CompleteData[,1:20][idx.na],
pch = 8, col = col.black2, cex = 0.9)
## Need to make these colors more transparent so we can see all of them
## Shift these slightly to the right when plotting ...
points(rep(1:20, each = 50)[idx.na]+0.23, t(kNN_Trunc_Imp[1:20,])[idx.na],
pch=17, ylim = c(-8,8), col = col.blue, cex = 0.9) ## Trun
points(rep(1:20, each = 50)[idx.na]+0.46, t(kNN_Corr_Imp[1:20,])[idx.na],
pch=15, ylim = c(-8,8),col = col.red, cex = 0.9) ## Corr
points(rep(1:20, each = 50)[idx.na]+0.7, t(kNN_Euc_Imp[1:20,])[idx.na],
pch=18, ylim = c(-8,8),col = col.green, cex = 0.9) ## EUC
legend("topleft", c("Original-Observed", "Orginal-Missing", "KNN-TN", "KNN-CR", "KNN-EU"),
pch = c(1, 8, 17, 15, 18), col = c(1, 1, 4, 2, 3), bty = "n",
text.col = c(1, 1, 4, 2, 3), pt.cex = 1)
## Color region below LOD light red
col.red2 <- rgb(255,0,0,alpha=25,maxColorValue=255)
lims <- par("usr")
polygon(x = c(lims[1], lims[2], lims[2], lims[1]), y = c(LOD, LOD, lims[3], lims[3]), col = col.red2,
border = NA)
dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.