#' Plot williams Plot
#'
#' This function perform the random split analysis method
#' @param data_to_plot is a list containing williamps plot data created y the function evaluating_model
#' @keywords plot williams plot
#' @export
#' @examples
#'
plot_wp = function(data_to_plot){
plot(x = data_to_plot$lev,y = data_to_plot$res,
col =data_to_plot$col, ylim = c(min(-4.5,data_to_plot$res),max(3.5,data_to_plot$res)),
xlim = c(0,max(max(data_to_plot$lev),data_to_plot$h_star)+0.1), xlab = "Leverage",ylab = "Standardized Residual")
abline(v=data_to_plot$h_star)
abline(h = 3, lty=3)
abline(h = -3, lty=3)
legend(x = "bottomright",legend = c("Train","Test"),fill = c("black","red"),bty = "n")
}
#' A function to compute hat matrix
#'
#' This function computes the hat matrix of regression
#' @param X is the dataset matrix with samples on rows and features on columns.
#' @return hat matrix whose diagonal represents the leverage of the regression
#' @keywords hat matrix, leverages
#' @export
#' @examples
#'
hat_matrix = function(X){
xtx = t(X) %*% X
diag(xtx) = diag(xtx) + runif(n = length(diag(xtx)),min = 0.001,max = 0.002)
ixtx = inv(xtx)
return(X %*% (ixtx %*% t(X)))
}
#' A function to create the williams plot and compute the percentage of applicability domain
#'
#' This function create the williams plot for a specific model and compute the percentage of training and test point falling in the applicability domain
#' @param X_train is the training dataset matrix with samples on rows and features on columns.
#' @param X_test is the test dataset matrix with samples on rows and features on columns.
#' @param y_train is the vector of the training response.
#' @param y_test is the vector of the test response.
#' @param model is a fitted linear model for which we want to evaluate the applicability domain
#' @param beta is a vector of the relevant beta values for the regression model
#' @return ADVal is a vector with the percentage of training and test sample falling in the applicability domain
#' @return DTP is a list containing the data to plot the williams plot. In particular it contain the leverages (lev)
#' the residual (res), the colors of training/test points (col) and the h* value (h_star)
#' @keywords williams plot, applicability domain
#' @export
#' @examples
#'
williams_plot = function(X_train, X_test, y_train, y_test, model,beta){
H_train= hat_matrix (rbind(X_train[,names(beta)],X_test[,names(beta)]))
#H_train= hat_matrix (X_train[,names(beta)])
y_pred_train = predict(model,X_train)
y_pred_test = predict(model,X_test)
residual_train= abs(y_train - y_pred_train)
residual_test= abs(y_test - y_pred_test)
s_residual_train = ((residual_train) - mean(residual_train)) / sd(residual_train)
s_residual_test = (residual_test - mean(residual_test))/ sd(residual_test)
leverage_train = diag(H_train)[rownames(H_train) %in% rownames(X_train)]
leverage_test = diag(H_train)[rownames(H_train) %in% rownames(X_test)]
# idx = which(rownames(H_train) %in% rownames(X_test))
# leverage_train = leverage_train[-idx]
p = length(beta) + 1#features
n = nrow(X_train)#+nrow(X_test)#training compounds
h_star = (3*p)/n
AD_train = 100 * (sum(leverage_train < h_star & abs(s_residual_train)<3) / length(leverage_train))
AD_test = 100 * (sum(leverage_test < h_star & abs(s_residual_test)<3) / length(leverage_test))
lev= c(leverage_train,leverage_test)
res = c(s_residual_train,s_residual_test)
col = c(rep("black",nrow(X_train)),rep("red",nrow(X_test)))
data_to_plot = list(lev=lev,res=res,col=col,h_star=h_star)
return(list(ADVal = c(AD_train,AD_test), DTP = data_to_plot))
}
#' A function to create the williams plot and compute the percentage of applicability domain
#'
#' This function create the williams plot for a specific model and compute the percentage of training and test point falling in the applicability domain
#' @param X_train is the training dataset matrix with samples on rows and features on columns.
#' @param X_test is the test dataset matrix with samples on rows and features on columns.
#' @param y_train is the vector of the training response.
#' @param y_test is the vector of the test response.
#' @param model is a fitted linear model for which we want to evaluate the applicability domain
#' @param beta is a vector of the relevant beta values for the regression model
#' @return ADVal is a vector with the percentage of training and test sample falling in the applicability domain
#' @return DTP is a list containing the data to plot the williams plot. In particular it contain the leverages (lev)
#' the residual (res), the colors of training/test points (col) and the h* value (h_star)
#' @keywords williams plot, applicability domain
#' @export
#' @examples
#'
williams_plot_linear = function(X_train, X_test, y_train, y_test, model){
H_train= hat_matrix (rbind(X_train,X_test))
y_pred_train = predict(model,as.data.frame(X_train))
y_pred_test = predict(model,as.data.frame(X_test))
residual_train= abs(y_train - y_pred_train)
residual_test= abs(y_test - y_pred_test)
s_residual_train = ((residual_train) - mean(residual_train)) / sd(residual_train)
s_residual_test = (residual_test - mean(residual_test))/ sd(residual_test)
leverage_train = diag(H_train)[rownames(H_train) %in% rownames(X_train)]
leverage_test = diag(H_train)[rownames(H_train) %in% rownames(X_test)]
p = ncol(X_train) + 1#features
n = nrow(X_train)#+nrow(X_test)#training compounds
h_star = (3*p)/n
AD_train = 100 * (sum(leverage_train < h_star & abs(s_residual_train)<3) / length(leverage_train))
AD_test = 100 * (sum(leverage_test < h_star & abs(s_residual_test)<3) / length(leverage_test))
lev= c(leverage_train,leverage_test)
res = c(s_residual_train,s_residual_test)
col = c(rep("black",nrow(X_train)),rep("red",nrow(X_test)))
data_to_plot = list(lev=lev,res=res,col=col,h_star=h_star)
return(list(ADVal = c(AD_train,AD_test), DTP = data_to_plot))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.