#' Variable importance plots (VIP)
#'
#' Makes variable importance plots (VIP) of partial least squares - discriminant analysis (PLS-DA), also displays score plots and biplots.
#' @param data Data table with variables (metabolites) in columns. Samples in rows are sorted according to specific groups.
#' @param name A character string or expression indicating a name of data set. It occurs in names of every output.
#' @param groupnames A character vector defining specific groups in data. Every string must be specific for each group and they must not overlap.
#' @param tsf Data transformation must be defined by "clr" (default), "log", "log10", "PQN", "lnPQN", "pareto" or "none". See Details.
#' @param top How many most important variables should be in zoomed VIP plot and biplot? The default is 30.
#' @param QCs logical. If FALSE (default) quality control samples (QCs) are automatically distinguished and skipped.
#' @details Data transformation: with "clr" clr trasformation is used (see References), with "log" natural logarithm is used, with "log10" decadic logarithm is used, with "pareto" data are only scaled by Pareto scaling, with "PQN" probabilistic quotient normalization is done, with "lnPQN" natural logarithm of PQN transformed data is done, with "none" no tranformation is done.
#' @details VIP plot can be used only for comparison of two groups. If there is more groups in data, all possible combinations of pairs are evaluated.
#' @details Up to twenty different groups can be distinguished in data (including QCs).
#' @return VIP plots, score plots and biplots of PLS-DA.
#' @return Excel file with mean, sd and angles of rays from group center for every variable.
#' @import car
#' @importFrom openxlsx write.xlsx
#' @importFrom stats sd quantile
#' @importFrom robCompositions cenLR
#' @references Aitchison, J. (1986) The Statistical Analysis of Compositional Data Monographs on Statistics and Applied Probability. Chapman & Hall Ltd., London (UK). p. 416.
#' @references Wold, S. et al. (2001) PLS-regression: a basic tool of chemometrics, CHEMOLAB 58 (2), p. 109-130.
#' @references Chong, I.G., Jun, C.H. (2005) Performance of some variable selection methods when multicollinearity is present, CHEMOLAB 78, p. 103-112.
#' @examples data=metabol
#' name="Metabolomics" #name of the project
#' groupnames=c("Con","Pat","QC")
#' GraphsVIP(data,name,groupnames)
#' @export
GraphsVIP=function(data,name,groupnames,tsf="clr",top=30,QCs=FALSE){
############################################################################################
dirout=getwd()
dirout2 = paste(dirout,"/","PLSDA",sep = "")
dir.create(dirout2, recursive=TRUE, showWarnings = FALSE) # vytvori v zadane ceste slozku s nazvem "note"
setwd(dirout2)
##########################################################################################################################
if (tsf=="clr"){
dataM=cenLR(data)$x.clr
}
if (tsf=="log"){
dataM=log(data)
}
if (tsf=="log10"){
dataM=log10(data)
}
if (tsf=="pareto"){
dataM=scale(data, scale=TRUE, center=FALSE)
}
PQN1=function (x){
xref=apply(x,2,median)
podil=x
for (i in 1:ncol(x)){
podil[,i]=x[,i]/xref[i]
}
s=apply(podil,1,median)
PQN=x
for (j in 1:nrow(x)){
PQN[j,]=x[j,]/s[j]
}
return(PQN)
}
if (tsf=="PQN"){
dataM=PQN1(data)
}
if (tsf=="lnPQN"){
dataM=log(PQN1(data))
}
if (tsf=="none"){
dataM=data
}
##########################################################################################################################
basecolor=c("blue","magenta","forestgreen","darkorange","deepskyblue","mediumaquamarine","lightslateblue","saddlebrown",
"gray40","darkslateblue","firebrick","darkcyan","darkmagenta", "deeppink1","limegreen","gold2","bisque2",
"lightcyan3","red","darkolivegreen3") # Basic colours from: http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
basemarks=c(15,17,18,8,11,2,0,16,5,6,4,10,3,7,9,12)
count=length(groupnames)
Group=matrix(rep(NA,count*3),ncol=3)
colnames(Group)=c("min","max","length")
rownames(Group)=groupnames
groups=NULL
marks=NULL
color=NULL
for (i in 1:count){
Gr=grep(groupnames[i],rownames(dataM))
Group[i,1]=min(Gr)
Group[i,2]=max(Gr)
Group[i,3]=length(Gr)
gr=rep(i,length(Gr))
groups=c(groups,gr)
zn=rep(basemarks[i],length(Gr))
marks=c(marks,zn)
cl=rep(basecolor[i],length(Gr))
color=c(color,cl)
}
##########################################################################################################################
if (QCs==FALSE){
QC=grep("QC",rownames(dataM))
if (length(QC)!=0){
dataM=dataM[-QC,]
color=color[-QC]
marks=marks[-QC]
groups=groups[-QC]
groupnames=groupnames[unique(groups)]
count=length(groupnames)
Group=matrix(rep(NA,count*3),ncol=3)
colnames(Group)=c("min","max","length")
rownames(Group)=groupnames
for (i in 1:count){
Gr=grep(groupnames[i],rownames(dataM))
Group[i,1]=min(Gr)
Group[i,2]=max(Gr)
Group[i,3]=length(Gr)
}
}
}
############################################################################################
#VIP
VIP <- function(object) {
if (object$method != "oscorespls")
stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'")
if (nrow(object$Yloadings) > 1)
stop("Only implemented for single-response models")
SS <- c(object$Yloadings)^2 * colSums(object$scores^2)
Wnorm2 <- colSums(object$loading.weights^2)
SSW <- sweep(object$loading.weights^2, 2, SS / Wnorm2, "*")
sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS))
}
############################################################################################
"biplot.color" <-
function (x, y, var.axes = TRUE, col, cex = rep(par("cex"), 2),
xlabs = NULL, ylabs = NULL, expand = 1, xlim = NULL, ylim = NULL,
arrow.len = 0.1, pch=marks, pch.cex=1, xlabMY, ylabMY, ...)
{
n <- nrow(x)
p <- nrow(y)
if (is.null(xlabs)) {
xlabs <- rownames(x)
if (is.null(xlabs))
xlabs <- 1:n
}
xlabs <- as.character(xlabs)
dimnames(x) <- list(xlabs, dimnames(x)[[2]])
if (missing(ylabs)) {
ylabs <- dimnames(y)[[1]]
if (is.null(ylabs))
ylabs <- paste("Var", 1:p)
}
ylabs <- as.character(ylabs)
dimnames(y) <- list(ylabs, dimnames(y)[[2]])
if (length(cex) == 1)
cex <- c(cex, cex)
if (missing(col)) {
col <- par("col")
if (!is.numeric(col))
col <- match(col, palette())
col <- c(col, col + 1)
}
else if (length(col) == 1)
col <- c(col, col)
unsigned.range <- function(x) c(-abs(min(x)), abs(max(x)))
rangx1 <- unsigned.range(x[, 1])
rangx2 <- unsigned.range(x[, 2])
rangy1 <- unsigned.range(y[, 1])
rangy2 <- unsigned.range(y[, 2])
if (missing(xlim) && missing(ylim))
xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
else if (missing(xlim))
xlim <- rangx1
else ylim <- rangx2
ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
on.exit(par(oldpar))
oldpar <- par(pty = "s")
######################
# Plot-Symbol definiert
plot(x, type = "p", xlim = xlim, ylim = ylim, col = col,pch=pch,
cex=pch.cex, xlab = xlabMY, ylab = ylabMY, ...)
# plot(x, type = "n", xlim = xlim, ylim = ylim, col = col, ...)
######################
# kein Text bei Objekten
text(x, xlabs, cex = cex[1], col = col,pos=3, ...)
par(new = TRUE)
plot(y, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim *
ratio, xlab = "", ylab = "", col = col, ...)
axis(3, col = 1)
axis(4, col = 1)
box(col = 1)
######################
# Text der Variablen
text(y, labels = ylabs, cex = cex[2], col = "red", ...)
if (var.axes)
######################
# Farbe der Variablen-Pfeile
arrows(0, 0, y[, 1] * 0.8, y[, 2] * 0.8, col = "red",
length = arrow.len)
invisible()
}
"biplot.color_transp" <-
function (x, y, var.axes = TRUE, col, cex = rep(par("cex"), 2),
xlabs = NULL, ylabs = NULL, expand = 1, xlim = NULL, ylim = NULL,
arrow.len = 0.1, pch=marks, pch.cex=1, xlabMY, ylabMY, ...)
{
n <- nrow(x)
p <- nrow(y)
if (is.null(xlabs)) {
xlabs <- rownames(x)
if (is.null(xlabs))
xlabs <- 1:n
}
xlabs <- as.character(xlabs)
dimnames(x) <- list(xlabs, dimnames(x)[[2]])
if (missing(ylabs)) {
ylabs <- dimnames(y)[[1]]
if (is.null(ylabs))
ylabs <- paste("Var", 1:p)
}
ylabs <- as.character(ylabs)
dimnames(y) <- list(ylabs, dimnames(y)[[2]])
if (length(cex) == 1)
cex <- c(cex, cex)
if (missing(col)) {
col <- par("col")
if (!is.numeric(col))
col <- match(col, palette())
col <- c(col, col + 1)
}
else if (length(col) == 1)
col <- c(col, col)
unsigned.range <- function(x) c(-abs(min(x)), abs(max(x)))
rangx1 <- unsigned.range(x[, 1])
rangx2 <- unsigned.range(x[, 2])
rangy1 <- unsigned.range(y[, 1])
rangy2 <- unsigned.range(y[, 2])
if (missing(xlim) && missing(ylim))
xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
else if (missing(xlim))
xlim <- rangx1
else ylim <- rangx2
ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
on.exit(par(oldpar))
oldpar <- par(pty = "s")
######################
# Plot-Symbol definiert
plot(x, type = "p", xlim = xlim, ylim = ylim, col = col,pch=pch,
cex=pch.cex, xlab = xlabMY, ylab = ylabMY, ...)
# plot(x, type = "n", xlim = xlim, ylim = ylim, col = col, ...)
######################
# kein Text bei Objekten
text(x, xlabs, cex = cex[1], col = col,pos=3, ...)
par(new = TRUE)
plot(y, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim *
ratio, xlab = "", ylab = "", col = col, ...)
axis(3, col = 1)
axis(4, col = 1)
box(col = 1)
######################
# Text der Variablen
text(y, labels = ylabs, cex = cex[2], col = "transparent", ...)
if (var.axes)
######################
# Farbe der Variablen-Pfeile
arrows(0, 0, y[, 1] * 0.8, y[, 2] * 0.8, col = "transparent",
length = arrow.len)
invisible()
}
############################################################################################
for (i in 1:(count-1)){
for (j in (i+1):count){
PDF2=paste("VIP_all_",name,"_",groupnames[i],"_",groupnames[j],".pdf",sep="")
PDF3=paste("VIP_top_",name,"_",groupnames[i],"_",groupnames[j],".pdf",sep="")
PDF4=paste("PLSDA_",name,"_",groupnames[i],"_",groupnames[j],".pdf",sep="")
vyber=c(Group[i,1]:Group[i,2],Group[j,1]:Group[j,2])
groups2=c(rep(0,Group[i,3]),rep(1,Group[j,3]))
lc=Group[i,3]
lp=Group[j,3]
datanew=dataM[vyber,]
dataSnew=scale(datanew, scale=FALSE, center=TRUE)
y1=rep(1,lc)
y2=rep(0,lp)
yyd=c(y1,y2)
color2=color[vyber]
marks2=marks[vyber]
groups2=c(rep(1,lc),rep(2,lp))
groupnames2=groupnames[c(i,j)]
############################################################################################
#The distance of rays from the center:
ys=as.matrix(scale(yyd,scale=FALSE))
resst2 <- mvr(ys~dataSnew,ncomp=2,method="simpls")
G=resst2$scores
H=resst2$loadings
rownames(G)=rownames(dataSnew)
lengthx=matrix(rep(0,nrow(H)),nrow=nrow(H))
for(k in 1:nrow(H)){
for(l in 1:nrow(H)){
a=abs(H[k,1])
b=abs(H[k,2])
lengthx[k]=sqrt(a^2+b^2)
rownames(lengthx)=rownames(H)
}}
discrim=as.matrix(lengthx[order(lengthx[,1],decreasing = TRUE),])
matrixH=cbind(lengthx,H)
matrixH=matrixH[order(matrixH[,1],decreasing = TRUE),]
matrixH=matrixH[,-1]
sc1=quantile(G[1:lc,1],0.5)
sc2=quantile(G[1:lc,2],0.5)
sc=c(sc1,sc2)
sp1=quantile(G[(lc+1):(lc+lp),1],0.5)
sp2=quantile(G[(lc+1):(lc+lp),2],0.5)
sp=c(sp1,sp2)
colorc=unique(color2)[1]
colorp=unique(color2)[2]
theta <- function(x1, x2) acos( sum(x1*x2) / ( sqrt(sum(x1 * x1)) * sqrt(sum(x2 * x2)) ) )
uhel=matrix(rep(0,nrow(H)*2),ncol=2)
rownames(uhel)=rownames(H)
colnames(uhel)=c(groupnames2[1],groupnames2[2])
for(m in 1:nrow(H)){
uhel[m,1]=theta(sc,H[m,])
uhel[m,2]=theta(sp,H[m,])
}
###############################################################
ys=as.matrix(scale(yyd,scale=FALSE))
if (length(ys)<=10){ncompp=length(ys)-1
} else if (ncol(dataSnew)<10) {ncompp=ncol(dataSnew)-1
} else {ncompp=10
}
resst <- mvr(ys~dataSnew,ncomp=ncompp,method="oscorespls")
msep=MSEP(resst,intercept=FALSE)
comp=which.min(msep$val[1,,])
if (comp>(ncompp-2)){comp=ncompp-2
} else {
comp=comp
}
###############################################################
vip=VIP(resst)[comp,]
vipmean=matrix(vip,ncol=ncol(dataSnew))
colnames(vipmean)=colnames(dataSnew)
ycon=rep(1,length(which(yyd==1)))
ycon2=rep(0,length(which(yyd==0)))
BE=matrix(rep(NA,length(ycon)*length(ycon2)*ncol(dataSnew)),ncol=ncol(dataSnew))
#jackknife for standard deviation estimation
k=1
for(m in 1:length(ycon)){
for(n in 1:length(ycon2)){
testX=dataSnew[-c(m,length(ycon)+n),]
testy=c(ycon[-m],ycon2[-n])
testys=as.matrix(scale(testy,scale=FALSE))
resst <- mvr(testys~testX,ncomp=comp,method="oscorespls")
BE[k,]=VIP(resst)[comp,]
k=k+1
}
}
sdbeta=matrix(apply(BE,2,sd),ncol=ncol(dataSnew))
matice=rbind(vipmean,sdbeta,t(uhel))
matice=matice[,order(matice[1,],decreasing =TRUE)]
vipmean=matice[1,]
sdbeta=matice[2,]
uhelcon=matice[3,]
uhelpac=matice[4,]
maxy=max(vipmean)+1.75*max(sdbeta)
ccolor=rep(0,ncol(matice))
for(l in 1:ncol(matice)){
if (uhelcon[l]<uhelpac[l]){ccolor[l]=colorc}
else ccolor[l]=colorp
}
pdf((PDF2),height=10,width=round((length(colnames(data))/8),digits = 0))
bar=barplot(vipmean,ylim=c(0,maxy),main=paste("VIP plot - ",name, sep=""), ylab="VIP",col=ccolor,cex.names=0.75,las=2)
segments(bar, vipmean - sdbeta, bar, vipmean + sdbeta, lwd=1)
segments(bar - 0.1, vipmean - sdbeta, bar + 0.1, vipmean - sdbeta, lwd=1)
segments(bar - 0.1, vipmean + sdbeta, bar + 0.1, vipmean + sdbeta, lwd=1)
legend("topright",legend = groupnames2, pch = 16, col = unique(color2))
dev.off()
rownames(matice)=c("Mean","Sd",paste("Angle_",groupnames[i],sep=""),paste("Angle_",groupnames[j],sep=""))
write.xlsx(t(matice),file = paste("VIP_",name,"_",groupnames[i],"_",groupnames[j],".xlsx", sep=""),
sheetName=paste(groupnames[i],"_",groupnames[j],sep=""),
col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE)
vipmean=vipmean[1:top]
sdbeta=sdbeta[1:top]
pdf((PDF3),width=15,height=10)
par(mar=c(11,4.5,1,0))
bar=barplot(vipmean,width=rep(0.25,8),xlim=c(0,10),ylim=c(0,maxy),ylab="VIP",col=ccolor[1:top],cex.names=1.2,las=2,main="VIP score",cex.axis=1.5,cex=1.5,cex.lab=1.5)
segments(bar, vipmean - sdbeta, bar, vipmean + sdbeta, lwd=1)
segments(bar - 0.1, vipmean - sdbeta, bar + 0.1, vipmean - sdbeta, lwd=1)
segments(bar - 0.1, vipmean + sdbeta, bar + 0.1, vipmean + sdbeta, lwd=1)
legend("topright",legend = groupnames2, pch = 16, col = unique(color2))
dev.off()
pdf((PDF4),width=10,height=10)
biplot.color(G,H,cex=c(0.8,0.6),col=c(color2),main = paste("PLSDA - ",name,"_",groupnames[i],"_",groupnames[j], sep="")
,xlabMY="PC1",ylabMY="PC2", pch=marks2,lwd=0.00000001,arrow.len=0.05)
legend("topleft",legend = groupnames2, pch = unique(marks2), col = unique(color2),cex=0.6)
biplot.color(G,matrixH[1:top,],cex=c(0.8,0.6),col=c(color2),main = paste("PLSDA - ",name,"_",groupnames[i],"_",groupnames[j],sep="")
,xlabMY="PC1",ylabMY="PC2", pch=marks2,lwd=0.00000001,arrow.len=0.05)
legend("topleft",legend = groupnames2, pch = unique(marks2), col = unique(color2),cex=0.6)
biplot.color_transp(G,H,cex=c(0.8,0.6),col=c(color2),main = paste("PLSDA - ",name,"_",groupnames[i],"_",groupnames[j],sep="")
,xlabMY="PC1",ylabMY="PC2", pch=marks2,lwd=0.00000001,arrow.len=0.05)
legend("topleft",legend = groupnames2, pch = unique(marks2), col = unique(color2),cex=0.6)
gr=as.factor(groups2)
dataEllipse(G[,1],G[,2],gr,group.labels=groupnames2, levels=0.75, plot.points=TRUE, center.cex=0.2, col=unique(color2),
pch=unique(marks2),center.pch=3,main=paste("PLSDA - ",name,"_",groupnames[i],"_",groupnames[j],sep=""),xlab="PC1",ylab="PC2")
legend("bottomright",legend = groupnames2, pch = unique(marks2), col = unique(color2), cex=0.6)
dataEllipse(G[,1],G[,2],gr,group.labels=groupnames2, levels=0.95, plot.points=TRUE, center.cex=0.2, col=unique(color2),
pch=unique(marks2),center.pch=3,main=paste("PLSDA - ",name,"_",groupnames[i],"_",groupnames[j],sep=""),
xlab="PC1",ylab="PC2")
legend("bottomright",legend = groupnames2, pch = unique(marks2), col = unique(color2), cex=0.6)
dev.off()
}
}
setwd(dirout)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.