README.md

hyQSAR R Package

hyQSAR is an R package that can be used to perform hybrid QSAR modelling by integrating structural properties of chemical compounds and their molecular mechanism-of-action (MOA) information. HyQSAR enables easier interpretation of the mechanistic associations between the exposure properties and their biological effects for a given endpoint of interest. It implements a LASSO-based method to optimise the feature selection from multiple data sources as well as the best model parameters by random split validation. In addition, hyQSAR allows automatic model validation according to the OECD requirements. Multiple visualisations are also provided to help the interpretation of the output.

installing hyQSAR from GitHub

library(devtools)
install_github("angy89/hyQSAR")

example of usage

library(hyQSAR)

#load dataset already preprocessed
data("hyQSAR_dataset")

nLambda = 100
nCores = 45
n.splits = 99

# run traditional QSAR using only MDS
md.tqsar.res = tqsar(type.transformation = "none", X_train,X_test, y_train,y_test,
                     md_list = colnames(X_train),gene_list = c(),n.splits = n.splits,
                     nLambda = nLambda, nCores = nCores)

# run traditional QSAR using only MOA features
ge.tqsar.res = tqsar(type.transformation = "none", X_train = X_ge[train.idx,],
                     X_test = X_ge[test.idx,], y_train,y_test,
                     md_list = c(), gene_list = colnames(X_ge),
                     n.splits = n.splits,nLambda = nLambda, nCores = nCores)

# create stacking regression model by combining MD and MOA models
stack_res = stacking_regression_wrapper(pred_tr = c(md.tqsar.res$pred_tr,ge.tqsar.res$pred_tr),
                                        pred_te = c(md.tqsar.res$pred_te,ge.tqsar.res$pred_te),
                                        y_test = y_test,y_train =y_train)


# Create hybrid QSAR model by combining MDs and MOA features with the ABS-POW transformation

# fit hybrid models by applying the ABS-POW transformation to both MD and MOA features. This function computes 81 models
# for each combination of the alpha and alpha1 values. The model are also validated by computing up-to-date internal and
# external validation measures according to the OECD principles.
abs.alpha.power.res = hqsar(type.transformation.md = "abs.alpha.power",
                            type.transformation.ge = "abs.alpha.power",
                            X_md_train = X_md[train.idx,],X_md_test = X_md[test.idx,],
                            X_ge_train = X_ge[train.idx,],X_ge_test = X_ge[test.idx,],
                            y_train = y[train.idx],y_test = y[test.idx],
                            alpha = c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2),
                            alpha1 = c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2),
                            type.solution="lci", measure="MSE",
                            n.splits = n.splits,nLambda = nLambda, nCores = nCores)

# this function filter the 81 models computed with the hqsar function according to the accepted threshold for the validation
# metrics. Furthermore it store the filtered and unfiltered metric files in the start_path folder. Eventually it store the
# williams plot, the scatter plot of experimental vs predicted values and the csv with the predicted values in the same folder.

#### NB change the start_path variable or create the results folder!
dir.create("results")
abs.alpha.power.res.filtered.metrics=build_predicted_results(res = abs.alpha.power.res,
                                                             type.transformation="abs_alpha_power_res__abs_alpha_power_res",
                                                             start_path = "results/", xlab = "Experimental HSB", ylab="Predicted HSB")

#### NB check the optimal solution and set the idx variable accordingly
View(abs.alpha.power.res.filtered.metrics)
idx = 36

# compute the stacking regression model combining the prediced results of the 81 models
abs.alpha.power.res_stacking_res_list = stacking_regression_wrapper(pred_tr = abs.alpha.power.res$pred_tr,
                                                                    pred_te = abs.alpha.power.res$pred_te,
                                                                    y_test = abs.alpha.power.res$y_test,
                                                                    y_train = abs.alpha.power.res$y_train)

# Plot the RSVA correlation plot of the 36-th model
plot(abs.alpha.power.res$finalModels[[idx]])

# investigate MDs and MOA features in the selected model
coeff = round(as.numeric(unlist(strsplit(as.character(abs.alpha.power.res$Metrics[paste(idx,sep = ""),c("coefficients")]),split=";"))),4)
MDs = unlist(strsplit(as.character(abs.alpha.power.res$Metrics[paste(idx,sep = ""),"MDlist"]),split = ";"))
GEs = unlist(strsplit(as.character(abs.alpha.power.res$Metrics[paste(idx,sep = ""),"GEList"]),split = ";"))
MOD = cbind(c("Intercept",MDs,GEs),c(round(as.numeric(abs.alpha.power.res$Metrics[paste(idx,sep = ""),"Intercept"]),4),coeff))

# williams plot of the 36-th model
plot_wp(abs.alpha.power.res$williams_plots[[idx]]$DTP)

# scatterplot of the experimental versus predicted values of the 36-th model
hyQSAR:::plot_pred_real(pred_train=abs.alpha.power.res$pred_tr[[idx]],
               pred_test=abs.alpha.power.res$pred_te[[idx]],
               train_class = abs.alpha.power.res$y_train,
               test_class = abs.alpha.power.res$y_test,xlab = "predicted",ylab = "real")

# Plot correlation graph of the MDs/MOA features of the model
plot_MD_GE_graph(model_info= abs.alpha.power.res.filtered.metrics[paste(idx,sep = ""),],
                 MDMat= rbind(X_md[train.idx,],X_md[test.idx,]),
                 GeneMat= rbind(X_ge[train.idx,],X_ge[test.idx,]),
                 neg_th = 0.9, pos_th = 0.9)



angy89/hyQSAR documentation built on Sept. 24, 2019, 7:31 a.m.