Caret_Models_VS_UT <- function(train_genotypes, train_phenotype,test_genotypes, test_phenotype,model="svmRadial",type="Regression",markers=NULL,Kernel="Gaussian",Sparse=FALSE,m=NULL,degree=NULL, nL=NULL,transformation=NULL,sampling="up",repeats=5,method="repeatedcv"){
# for fitting SVMs
#library(DMwR)
# Make the CV list
# Split into training and testing data
# Calculate the GS model using rrBLUP
if(Kernel=="Markers"){
if(type=="Classification"){
train_phenotype[,2] <- as.factor(train_phenotype[,2])
train_phenotype[train_phenotype=="NaN"]<-NA
train_phenotype=droplevels(train_phenotype)
myY_train <- train_phenotype[,2]
myY_train=as.factor(myY_train)
myY_train=droplevels(myY_train)
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}else{
if(transformation=="sqrt"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] < 0, 0)
myY_train <- sqrt(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="log"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] <= 0, 0.000001)
myY_train <- log(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="boxcox"){
myY_train <- boxcox_t(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="none"){
myY_train <- train_phenotype[,2]
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
}
if(!is.null(markers)){
samp=sample(1:ncol(train_genotypes), markers)
myGD_train <- train_genotypes[,samp]
myGD_test <- test_genotypes[,samp]
}else{
myGD_train <- train_genotypes
myGD_test <- test_genotypes
}
myGD_train2=as.matrix(sapply(train_genotypes, as.numeric))
myGD_test1=as.matrix(sapply(test_genotypes, as.numeric))
if(type=="Classification"){
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none",sampling = sampling)
}else{
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none")
}
svmFit1 <- train(myGD_train2,myY_train,
method = model,
preProcess = c("center", "scale","nzv"),
trControl = ctrl,
tuneLength = 10,
allowParallel=TRUE)
svm.linear_pred1 <- predict(svmFit1, myGD_test1)
prediction=data.frame(pheno_test,pred=svm.linear_pred1)
}
else if(Kernel=="VanRaden"){
#pheno_train <- phenotype
#pheno_train[-fold_indices,2] <- NA
#myY_train <- pheno_train[,2]
#myY_train=droplevels(myY_train)
genotypes=rbind(train=train_genotypes,test=test_genotypes)
VR=GAPIT.kinship.VanRaden(genotypes)
if(type=="Classification"){
train_phenotype[,2] <- as.factor(train_phenotype[,2])
train_phenotype[train_phenotype=="NaN"]<-NA
train_phenotype=droplevels(train_phenotype)
myY_train <- train_phenotype[,2]
myY_train=as.factor(myY_train)
myY_train=droplevels(myY_train)
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}else{
if(transformation=="sqrt"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] < 0, 0)
myY_train <- sqrt(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="log"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] <= 0, 0.000001)
myY_train <- log(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="boxcox"){
myY_train <- boxcox_t(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="none"){
myY_train <- train_phenotype[,2]
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
}
myGD_train2=VR[c(1:length(train_phenotype[,2])),]
myGD_test1=VR[-c(1:length(train_phenotype[,2])),]
if(type=="Classification"){
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none",sampling = sampling)
}else{
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none")
}
svmFit1 <- train(myGD_train2,myY_train,
method = model,
preProcess = c("center", "scale","nzv"),
trControl = ctrl,
tuneLength = 10,
allowParallel=TRUE)
svm.linear_pred1 <- predict(svmFit1, myGD_test1)
prediction=data.frame(pheno_test,pred=svm.linear_pred1)
}else if(Kernel=="PC"){
genotypes=rbind(train=train_genotypes,test=test_genotypes)
PCA=prcomp(genotypes)
myPCA=PCA$x
if(type=="Classification"){
train_phenotype[,2] <- as.factor(train_phenotype[,2])
train_phenotype[train_phenotype=="NaN"]<-NA
train_phenotype=droplevels(train_phenotype)
myY_train <- train_phenotype[,2]
myY_train=as.factor(myY_train)
myY_train=droplevels(myY_train)
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}else{
if(transformation=="sqrt"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] < 0, 0)
myY_train <- sqrt(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="log"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] <= 0, 0.000001)
myY_train <- log(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="boxcox"){
myY_train <- boxcox_t(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="none"){
myY_train <- train_phenotype[,2]
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
}
myGD_train2=myPCA[c(1:length(train_phenotype[,2])),]
myGD_test1=myPCA[-c(1:length(train_phenotype[,2])),]
if(type=="Classification"){
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none",sampling = sampling)
}else{
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none")
}
svmFit1 <- train(myGD_train2,myY_train,
method = model,
preProcess = c("center", "scale","nzv"),
trControl = ctrl,
tuneLength = 10,
allowParallel=TRUE)
svm.linear_pred1 <- predict(svmFit1, myGD_test1)
prediction=data.frame(pheno_test,pred=svm.linear_pred1)
}
else if(Kernel=="Zhang"){
#pheno_train <- phenotype
#pheno_train[-fold_indices,2] <- NA
#myY_train <- pheno_train[,2]
#myY_train=droplevels(myY_train)
genotypes=rbind(train=train_genotypes,test=test_genotypes)
ZZ=GAPIT.kinship.Zhang(genotypes)
if(type=="Classification"){
train_phenotype[,2] <- as.factor(train_phenotype[,2])
train_phenotype[train_phenotype=="NaN"]<-NA
train_phenotype=droplevels(train_phenotype)
myY_train <- train_phenotype[,2]
myY_train=as.factor(myY_train)
myY_train=droplevels(myY_train)
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}else{
if(transformation=="sqrt"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] < 0, 0)
myY_train <- sqrt(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="log"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] <= 0, 0.000001)
myY_train <- log(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="boxcox"){
myY_train <- boxcox_t(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="none"){
myY_train <- train_phenotype[,2]
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
}
myGD_train2=ZZ[c(1:length(train_phenotype[,2])),]
myGD_test1=ZZ[-c(1:length(train_phenotype[,2])),]
if(type=="Classification"){
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none",sampling = sampling)
}else{
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none")
}
svmFit1 <- train(myGD_train2,myY_train,
method = model,
preProcess = c("center", "scale","nzv"),
trControl = ctrl,
tuneLength = 10,
allowParallel=TRUE)
svm.linear_pred1 <- predict(svmFit1, myGD_test1)
prediction=data.frame(pheno_test,pred=svm.linear_pred1)
}
else if(Kernel=="Endelman"){
#pheno_train <- phenotype
#pheno_train[-fold_indices,2] <- NA
#myY_train <- pheno_train[,2]
#myY_train=droplevels(myY_train)
genotypes=rbind(train=train_genotypes,test=test_genotypes)
G=A.mat(genotypes)
if(type=="Classification"){
train_phenotype[,2] <- as.factor(train_phenotype[,2])
train_phenotype[train_phenotype=="NaN"]<-NA
train_phenotype=droplevels(train_phenotype)
myY_train <- train_phenotype[,2]
myY_train=as.factor(myY_train)
myY_train=droplevels(myY_train)
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}else{
if(transformation=="sqrt"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] < 0, 0)
myY_train <- sqrt(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="log"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] <= 0, 0.000001)
myY_train <- log(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="boxcox"){
myY_train <- boxcox_t(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="none"){
myY_train <- train_phenotype[,2]
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
}
myGD_train2=G[c(1:length(train_phenotype[,2])),]
myGD_test1=G[-c(1:length(train_phenotype[,2])),]
if(type=="Classification"){
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none",sampling = sampling)
}else{
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none")
}
svmFit1 <- train(myGD_train2,myY_train,
method = model,
preProcess = c("center", "scale","nzv"),
trControl = ctrl,
tuneLength = 10,
allowParallel=TRUE)
svm.linear_pred1 <- predict(svmFit1, myGD_test1)
prediction=data.frame(pheno_test,pred=svm.linear_pred1)
}
else{
genotypes=rbind(train=train_genotypes,test=test_genotypes)
maf <- calc_maf_apply(genotypes, encoding = c(0, 1, 2))
mono_indices <- which(maf ==0)
if(length(mono_indices)!=0){
genotypes = genotypes[,-mono_indices]
}
if(Sparse==TRUE){
X=as.matrix(genotypes)
X=apply(genotypes,2,as.numeric)
#sum(rowSums(is.na(X)))
XS<-scale(X,center=TRUE,scale=TRUE)
#sum(rowSums(is.na(XS)))
library(caret)
nzv <- nearZeroVar(XS)
XSZV <- XS[, -nzv]
#sum(rowSums(is.na(XSZV)))
X=XSZV
K=Sparse_Kernel_Construction(m=m,X=X,name=Kernel, degree=degree,nL=nL)
}else{
X=as.matrix(genotypes)
X=apply(genotypes,2,as.numeric)
#sum(rowSums(is.na(X)))
XS<-scale(X,center=TRUE,scale=TRUE)
#sum(rowSums(is.na(XS)))
library(caret)
nzv <- nearZeroVar(XS)
XSZV <- XS[, -nzv]
#sum(rowSums(is.na(XSZV)))
X=XSZV
K=Kernel_computation(X=X,name=Kernel,degree=degree, nL=nL)
}
if(type=="Classification"){
train_phenotype[,2] <- as.factor(train_phenotype[,2])
train_phenotype[train_phenotype=="NaN"]<-NA
train_phenotype=droplevels(train_phenotype)
myY_train <- train_phenotype[,2]
myY_train=as.factor(myY_train)
myY_train=droplevels(myY_train)
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}else{
if(transformation=="sqrt"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] < 0, 0)
myY_train <- sqrt(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="log"){
train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] <= 0, 0.000001)
myY_train <- log(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="boxcox"){
myY_train <- boxcox_t(train_phenotype[,2])
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
if(transformation=="none"){
myY_train <- train_phenotype[,2]
pheno_test=test_phenotype
pheno_test[,2]<-NA
pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
}
}
myGD_train2=K[c(1:length(train_phenotype[,2])),]
myGD_test1=K[-c(1:length(train_phenotype[,2])),]
if(type=="Classification"){
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none",sampling = sampling)
}else{
ctrl=trainControl(method = method, repeats = repeats, savePredictions="none")
}
svmFit1 <- train(myGD_train2,myY_train,
method = model,
preProcess = c("center", "scale","nzv"),
trControl = ctrl,
tuneLength = 10,
allowParallel=TRUE)
svm.linear_pred1 <- predict(svmFit1, myGD_test1)
prediction=data.frame(pheno_test,pred=svm.linear_pred1)
}
if(type=="Classification"){
results_ALL=list(Predictions=prediction)
}else{
results_ALL=list(Predictions=prediction)
}
return(results_ALL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.