#' Function to compute the Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2 metrics
#'
#' This function computes the mean Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2 values for a certain number of random split repetition
#' @import glmnet
#' @param y is the reponse vector of the training set
#' @param X is the matrix of the dataset with samples on the rows and features on the column
#' @param train.prop is percentage of point to be used as training
#' @param lambda is the shrinking parameter of the lasso model
#' @param nPerm is the number of permutation to perform
#' @return a vector of numeric values representing the mean Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2
#' @keywords Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2
#' @export
#' @examples
#'
validation_metrics = function(y,X,train.prop=0.9, nPerm = 100,lambda){
n = length(y)
ntrain <- ceiling(train.prop * n)
ntest = n-ntrain
CVmse <- Q2 <- Q2F1 <- Q2F2 <- Q2F3 <- CCC <- c()
print("External Measures computation")
pb = txtProgressBar(min=1,max=nPerm,style=3)
for(i in 1:nPerm){
idx_train <- sample(1:n, size = ntrain, replace = FALSE)
idx_test = 1:n
idx_test = idx_test[-idx_train]
X_train_train = X[idx_train,]
X_train_test = X[-idx_train,]
train_class = y[idx_train]
#model with intercept
fit = glmnet(x = X_train_train,y = train_class,lambda = lambda,intercept = TRUE)
#prediction with intercept
yhat = predict(fit,X_train_test)
RSS <- sum( { y[-idx_train] - yhat }^2 )
TSS <- sum( { y[-idx_train] - mean(y) }^2 )
CVmse = c(CVmse,RSS / ntest)
Q2 = c(Q2,1 - RSS / TSS)
Q2F1 = c(Q2F1,Q2F1_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
Q2F2 = c(Q2F2,Q2F2_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
Q2F3 = c(Q2F3,Q2F3_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
CCC = c(CCC,CCC_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
setTxtProgressBar(pb,i)
}
close(pb)
Q2 = mean(Q2)
CCC = mean(CCC)
Q2F1 = mean(Q2F1)
Q2F2 = mean(Q2F2)
Q2F3 = mean(Q2F3)
return(c(Q2,CCC,Q2F1,Q2F2,Q2F3,0))
}
#' Function to compute the Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2 metrics
#'
#' This function computes the mean Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2 values for a certain number of random split repetition
#' @param y is the reponse vector of the training set
#' @param X is the matrix of the dataset with samples on the rows and features on the column
#' @param train.prop is percentage of point to be used as training
#' @param nPerm is the number of permutation to perform
#' @return a vector of numeric values representing the mean Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2
#' @keywords Q2, CCC, Q2F1, Q2F2, Q2F3, and rm2
#' @export
#' @examples
#'
validation_metrics_linear = function(y,X,train.prop=0.9, nPerm = 100){
n = length(y)
ntrain <- ceiling(train.prop * n)
ntest = n-ntrain
CVmse <- Q2 <- Q2F1 <- Q2F2 <- Q2F3 <- CCC <- c()
print("External Measures computation")
pb = txtProgressBar(min=1,max=nPerm,style=3)
for(i in 1:nPerm){
idx_train <- sample(1:n, size = ntrain, replace = FALSE)
idx_test = 1:n
idx_test = idx_test[-idx_train]
X_train_train = X[idx_train,]
X_train_test = X[-idx_train,]
train_class = y[idx_train]
#model with intercept
fit = lm(train_class~., data = as.data.frame(X_train_train ))
#prediction with intercept
yhat = predict(fit,as.data.frame(X_train_test))
RSS <- sum( { y[-idx_train] - yhat }^2 )
TSS <- sum( { y[-idx_train] - mean(y) }^2 )
CVmse = c(CVmse,RSS / ntest)
Q2 = c(Q2,1 - RSS / TSS)
Q2F1 = c(Q2F1,Q2F1_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
Q2F2 = c(Q2F2,Q2F2_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
Q2F3 = c(Q2F3,Q2F3_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
CCC = c(CCC,CCC_func(observed_train = y[idx_train],observed_test = y[-idx_train],predicted_test = yhat))
setTxtProgressBar(pb,i)
}
close(pb)
Q2 = mean(Q2)
CCC = mean(CCC)
Q2F1 = mean(Q2F1)
Q2F2 = mean(Q2F2)
Q2F3 = mean(Q2F3)
return(c(Q2,CCC,Q2F1,Q2F2,Q2F3,0))
}
# compute r2 and delta_r2 metrics
ojha_validation_metrics = function(X_train, X_test, y_train, y_test,lambda){
#model with intercept
print(" fit glmnet with intercept")
fit = glmnet(x = X_train,y = y_train,lambda = lambda,intercept = TRUE,nlambda = 10)
#model without intercept
print(" fit glmnet with no intercept")
fit0 = glmnet(x = X_train,y = y_train,lambda = lambda,intercept = FALSE,nlambda = 10)
#prediction with intercept
yhat = predict(fit,X_test)
#prediction without intercept
yhat0 = predict(fit0,X_test)
print("computed prediciton")
r_2 = R2_func(observed = y_test,predicted = yhat)
r_20 = R2_func(observed = y_test,predicted = yhat0)
if((r_2 - r_20) == 0){
rm2 = r_2
}else{
rm2 = r_2 * (1 - sqrt(abs(r_2 - r_20)))
}
r_2p = summary(lm(y_test~yhat))$adj.r.squared
r_20p = summary(lm(y_test~yhat0))$adj.r.squared
print("computed summary")
if((r_2 - r_20) == 0){
rm2p = r_2p
}else{
rm2p = r_2p * (1 - sqrt(abs(r_2p - r_20p)))
}
dr2m = abs(rm2-rm2p)
print("end")
return(c(rm2=rm2,dr2m=dr2m))
}
# compute r2 and delta_r2 metrics
ojha_validation_metrics_linear_model = function(X_train, X_test, y_train, y_test){
#model with intercept
fit = lm(formula = y_train~.,data = as.data.frame(X_train))
#model without intercept
fit0 = lm(formula = y_train~.-1,data = as.data.frame(X_train))
#prediction with intercept
yhat = predict(fit,as.data.frame(X_test))
#prediction without intercept
yhat0 = predict(fit0,as.data.frame(X_test))
r_2 = R2_func(observed = y_test,predicted = yhat)
r_20 = R2_func(observed = y_test,predicted = yhat0)
if((r_2 - r_20) == 0){
rm2 = r_2
}else{
rm2 = r_2 * (1 - sqrt(abs(r_2 - r_20)))
}
r_2p = summary(lm(y_test~yhat))$adj.r.squared
r_20p = summary(lm(y_test~yhat0))$adj.r.squared
if((r_2 - r_20) == 0){
rm2p = r_2p
}else{
rm2p = r_2p * (1 - sqrt(abs(r_2p - r_20p)))
}
dr2m = abs(rm2-rm2p)
return(c(rm2=rm2,dr2m=dr2m))
}
# compute thropsha metrics
tropsha_metrics = function(observed, predicted){
### data for tropsha criterias
k = sum(observed * predicted)/sum(predicted^2) # sum(observed * predicted)/sum(predicted^2)
k1 = sum(observed * predicted)/sum(observed^2) # sum(observed * predicted)/sum(observed^2)
yr0 = k * predicted # regression line through the origin defined by yr0 = k * predicted
yr0_ = k1 * observed # regression line through the origin defined by yr0_ = k1 * observed
low = min(min(predicted), min(observed))
up = max(max(predicted),max(observed))
plot(predicted, observed,ylim = c(low,up),xlim=c(low,up))
abline(v=0, lty=3)
abline(h=0, lty=3)
lines(predicted, yr0)
if(sd(predicted)>0.01){
abline(lm(observed~predicted), col="red")
}else{
abline(v = mean(predicted), col = "red")
}
plot(observed, predicted, ylim = c(low,up),xlim=c(low,up))
abline(v=0, lty=3)
abline(h=0, lty=3)
lines(observed, yr0_)
abline(lm(predicted~observed), col="red")
R20 = 1 - ( (sum(predicted-yr0)^2) / (sum((predicted-mean(predicted))^2)) )
R201 = 1 - ( (sum(observed-yr0_)^2) / (sum((observed-mean(observed))^2)) )
R = (sum((observed - mean(observed))*(predicted-mean(predicted))))/(sqrt((sum((observed-mean(observed))^2))*(sum((predicted-mean(predicted))^2))))
trm1 = ((R^2 - R20)/R^2)
trm2 = ((R^2 - R201)/R^2)
trm3 = abs(R^2 - R201)
return(c(trm1 = trm1, k = k, trm2 = trm2, k1=k1, trm3 = trm3))
}
#' this function compute slope a and intercept b for the regression line
#' yr = ay + b for the Golbraikh and Tropsha criteria, from Beware of q2!
#' Alexander Golbraikh, Alexander Tropsha (2002)
#' @param predicted vector of predicted values
#' @param observed vector of observed values
#' @return a vector conntaining the slope and intercept values
compute_slope_and_intercept = function(predicted, observed){
num = sum((predicted - mean(predicted)) * (observed - mean(observed)))
den = sum(predicted - mean(predicted))^2
a = num/den
b = mean(observed) - a * mean(predicted)
return(c(slope = a, intercept = b))
}
#' Function to compute the scrambled R2 and Q2
#'
#' This function computes the mean R2 and Q2 values for a certain number of repetition of the scrambling test
#' @param y is the reponse vector of the training set
#' @param X is the matrix of the dataset with samples on the rows and features on the column
#' @param train.prop is percentage of point to be used as training
#' @param lambda is the shrinking parameter of the lasso model
#' @param nPerm is the number of permutation to perform
#' @return a vector of numeric values representing the mean R2 and Q2 for the scrambling test
#' @keywords R2, Q2, scrambling test
#' @export
#' @examples
#'
random_split_Y_scramble = function(y,X,train.prop=0.9, nPerm = 100,lambda){
n = length(y)
ntrain <- ceiling(train.prop * n)
ntest = n-ntrain
R2 <- Q2 <- c()
print("Y scambling")
pb = txtProgressBar(min=1,max=nPerm,style=3)
for(i in 1:nPerm){
y = sample(y)
fit0 = glmnet(x = X,y = y,lambda = lambda,intercept = TRUE)
yhat0 = predict(fit0,X)
RSS <- sum( { y - yhat0 }^2 )
TSS <- sum( { y - mean(y) }^2 )
R2 = c(R2,1-(RSS/TSS))
# idx_test <- sample(5:44, size = 4, replace = FALSE)
# idx_train = 1:n
# idx_train = idx_train[-idx_test]
idx_train <- sample(1:n, size = ntrain, replace = FALSE)
idx_test = 1:n
idx_test = idx_test[-idx_train]
X_train_train = X[idx_train,]
X_train_test = X[-idx_train,]
train_class = y[idx_train]
fit = glmnet(x = X_train_train,y = train_class,lambda = lambda,intercept = TRUE)
yhat = predict(fit,X_train_test)
RSS <- sum( { y[-idx_train] - yhat }^2 )
TSS <- sum( { y[-idx_train] - mean(y) }^2 )
Q2 = c(Q2,1 - RSS / TSS)
setTxtProgressBar(pb,i)
}
close(pb)
Q2 = mean(Q2)
R2 = mean(R2)
return(c(R2,Q2))
}
#' Function to compute the scrambled MSE
#'
#' This function computes the mean MSE value for a certain number of repetition of the scrambling test
#' @import utils
#' @import glmnet
#' @import stats
#' @param y is the reponse vector of the training set
#' @param X is the matrix of the dataset with samples on the rows and features on the column
#' @param train.prop is percentage of point to be used as training
#' @param lambda is the shrinking parameter of the lasso model
#' @param nPerm is the number of permutation to perform
#' @return numeric value representing the mean MSE for the scrambling test
#' @keywords MSE scrambling test
#' @export
#' @examples
#'
Y_scrambled_mse = function(y,X,train.prop=0.9, nPerm = 100,lambda){
n = length(y)
ntrain <- ceiling(train.prop * n)
ntest = n-ntrain
MSE <- c()
print("Y scambling")
pb = txtProgressBar(min=1,max=nPerm,style=3)
for(i in 1:nPerm){
y = sample(y)
idx_test <- sample(1:n, size = ntest, replace = FALSE)
idx_train = 1:n
idx_train = idx_train[-idx_test]
X_train_train = X[idx_train,]
X_train_test = X[-idx_train,]
train_class = y[idx_train]
fit = glmnet(x = X_train_train,y = train_class,lambda = lambda,intercept = TRUE)
yhat = predict(fit,X_train_test)
RSS <- sum( { y[-idx_train] - yhat }^2 )
MSE = c(MSE,RSS / ntest)
setTxtProgressBar(pb,i)
}
close(pb)
MSE = mean(MSE)
return(MSE)
}
#' Q2F1 function
#'
#' This function computes the Q2F1 metric between observed and predicted value
#' @param observed_train is the vector of the observed reponses of the training set
#' @param observed_test is the vector of the observed reponses of the test set
#' @param predicted_test is the vector of the predicted reponses on the test set
#' @return numeric value representing the Q2F1 metric
#' @keywords Q2F1
#' @export
#' @examples
#'
Q2F1_func = function(observed_train, observed_test, predicted_test){
SSRes = sum((observed_test-predicted_test)^2)
SStot = sum((observed_test-mean(observed_train))^2)
r2 = 1 - (SSRes/SStot)
#if(r2<0){r2=abs(r2)}
return(r2)
}
#' Q2F2 function
#'
#' This function computes the Q2F2 metric between observed and predicted value
#' @param observed_train is the vector of the observed reponses of the training set
#' @param observed_test is the vector of the observed reponses of the test set
#' @param predicted_test is the vector of the predicted reponses on the test set
#' @return numeric value representing the Q2F2 metric
#' @keywords Q2F2
#' @export
#' @examples
#'
Q2F2_func = function(observed_train, observed_test, predicted_test){
SSRes = sum((observed_test-predicted_test)^2)
SStot = sum((observed_test-mean(observed_test))^2)
r2 = 1 - (SSRes/SStot)
#if(r2<0){r2=abs(r2)}
return(r2)
}
#' Q2F3 function
#'
#' This function computes the Q2F3 metric between observed and predicted value
#' @param observed_train is the vector of the observed reponses of the training set
#' @param observed_test is the vector of the observed reponses of the test set
#' @param predicted_test is the vector of the predicted reponses on the test set
#' @return numeric value representing the Q2F3 metric
#' @keywords Q2F3
#' @export
#' @examples
#'
Q2F3_func = function(observed_train, observed_test, predicted_test){
SSRes = sum((observed_test-predicted_test)^2)
SSRes = SSRes/length(observed_test)
SStot = sum((observed_train-mean(observed_train))^2)
SStot = SStot/length(observed_train)
r2 = 1 - (SSRes/SStot)
return(r2)
}
#' CCC function
#'
#' This function computes the CCC metric between observed and predicted value
#' @param observed_train is the vector of the observed reponses of the training set
#' @param observed_test is the vector of the observed reponses of the test set
#' @param predicted_test is the vector of the predicted reponses on the test set
#' @return numeric value representing the CCC metric
#' @keywords CCC
#' @export
#' @examples
#'
CCC_func = function(observed_train, observed_test, predicted_test){
SSRes = sum((observed_test-mean(observed_test))*(predicted_test-mean(predicted_test))) * 2
SStot = sum((observed_test-mean(observed_test))^2) +
sum((predicted_test-mean(predicted_test))^2) +
length(predicted_test) * (mean(observed_test)-mean(predicted_test))^2
r2 = SSRes/SStot
#if(r2<0){r2=abs(r2)}
return(r2)
}
#' R2 function
#'
#' This function computes the R2 metric between observed and predicted value
#' @param observed is the vector of the observed reponses
#' @param predicted is the vector of the predicted reponses
#' @return numeric value representing the R2 metric
#' @keywords R2
#' @export
#' @examples
#'
R2_func = function(observed, predicted){
SSRes = sum((predicted-observed)^2)
SStot = sum((observed-mean(observed))^2)
r2 = 1 - (SSRes/SStot)
#if(r2<0){r2=abs(r2)}
return(r2)
}
R2_func_2 = function(observed, predicted){
tss <- sum((predicted - mean(observed)) ^ 2) ## total sum of squares
regss <- sum((observed - mean(observed)) ^ 2) ## regression sum of squares
return(regss / tss)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.