cmf_krr_train_mem: To build model in memory

Description Usage Arguments Examples

Description

To build model in memory

Usage

1
cmf_krr_train_mem(y, kernels, alpha_grid_search = TRUE, gamma_grid_search = FALSE, conic_kernel_combination = FALSE, optimize_h = FALSE, mfields = c("q", "vdw", "logp", "abra", "abrb"), set_b_0 = FALSE, print_interm_icv = TRUE, plot_interm_icv = TRUE, print_final_icv = TRUE, plot_final_icv = TRUE, ...)

Arguments

y
kernels
alpha_grid_search
gamma_grid_search
conic_kernel_combination
optimize_h
mfields
set_b_0
print_interm_icv
plot_interm_icv
print_final_icv
plot_final_icv
...

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (y, kernels, alpha_grid_search = TRUE, gamma_grid_search = FALSE, 
    conic_kernel_combination = FALSE, optimize_h = FALSE, mfields = c("q", 
        "vdw", "logp", "abra", "abrb"), set_b_0 = FALSE, print_interm_icv = TRUE, 
    plot_interm_icv = TRUE, print_final_icv = TRUE, plot_final_icv = TRUE, 
    ...) 
{
    var_y <- var(y)
    ncomp <- length(y)
    alphas <- kernels$alphas
    nalphas <- length(alphas)
    nfields <- length(mfields)
    Q2_best_of_best <- -1000
    model <- list()
    fr <- function(par_list) {
        try_current_hyper_params <- function() {
            m <- build_krr_model(Km, y, gamma, set_b_0)
            if (is.null(m)) 
                return()
            y_pred <- Km %*% m$a + m$b
            regr <- regr_param(y_pred, y)
            RMSE <- regr$RMSE
            R2 <- regr$R2
            cv <- cv_krr(10, Km, y, gamma)
            RMSEcv <- cv$RMSE
            Q2 <- cv$R2
            y_pred_cv <- cv$y_pred_cv
            minQ2R2 <- min(Q2, R2)
            if (minQ2R2 > minQ2R2_best) {
                minQ2R2_best <<- minQ2R2
                RMSE_best <<- RMSE
                R2_best <<- R2
                RMSEcv_best <<- RMSEcv
                Q2_best <<- Q2
                if (alpha_grid_search) {
                  ialpha_best <<- ialpha
                }
                alpha_best <<- alpha
                gamma_best <<- gamma
                a_best <<- m$a
                b_best <<- m$b
                y_pred_best <<- y_pred
                y_pred_cv_best <<- y_pred_cv
            }
        }
        R2_best <- -1000
        RMSE_best <- -1
        Q2_best <- -1000
        RMSEcv_best <- -1
        minQ2R2_best <- -1000
        alpha_best <- -1
        gamma_best <- -1
        a_best <- NULL
        b_best <- NULL
        y_pred_best <- double()
        y_pred_cv_best <- double()
        h <- list()
        pos <- 1
        if (optimize_h) {
            if (conic_kernel_combination) {
                for (f in 1:nfields) h[[mfields[f]]] <- abs(par_list[f])
            }
            else {
                for (f in 1:nfields) h[[mfields[f]]] <- par_list[f]
            }
            pos <- pos + nfields
            if (!alpha_grid_search) {
                alpha <- par_list[pos]
                pos <- pos + 1
            }
            if (!gamma_grid_search) 
                gamma <- par_list[pos]
        }
        else {
            for (f in 1:nfields) h[[mfields[f]]] <- 1
            if (!alpha_grid_search) {
                alpha <- par_list[pos]
                pos <- pos + 1
            }
            if (!gamma_grid_search) 
                gamma <- par_list[pos]
        }
        if (alpha_grid_search) {
            for (ialpha in 1:length(alphas)) {
                alpha <- alphas[[ialpha]]
                Km <<- matrix(0, nrow = ncomp, ncol = ncomp)
                for (f in 1:nfields) {
                  Km <<- Km + h[[mfields[f]]] * kernels[[mfields[f]]][[ialpha]]
                }
                if (gamma_grid_search) {
                  for (gamma in gamma_list) {
                    try_current_hyper_params()
                  }
                }
                else {
                  try_current_hyper_params()
                }
            }
            alpha_best <- alphas[ialpha_best]
        }
        else {
            Km <<- cmf_calc_combined_kernels_1alpha(kernels, 
                h, alpha, alphas)
            if (gamma_grid_search) {
                for (gamma in gamma_list) {
                  try_current_hyper_params()
                }
            }
            else {
                try_current_hyper_params()
            }
        }
        if (Q2_best > Q2_best_of_best) {
            Q2_best_of_best <<- Q2_best
            if (print_interm_icv) {
                for (f in 1:nfields) cat(sprintf("h_%s=%g ", 
                  mfields[f], h[[mfields[f]]]))
                cat(sprintf("\n"))
                cat(sprintf("best: alpha=%g gamma=%g RMSE=%g R2=%g RMSEcv=%g Q2=%g \n", 
                  alpha_best, gamma_best, RMSE_best, R2_best, 
                  RMSEcv_best, Q2_best))
                flush.console()
            }
            if (plot_interm_icv) {
                cinf_plotxy(y_pred_cv_best, y, xlab = "Predicted", 
                  ylab = "Experiment", main = "Scatter Plot for Cross-Validation (Internal)")
                abline(coef = c(0, 1))
            }
            model$gamma <<- gamma_best
            for (f in 1:nfields) {
                model$h[[mfields[f]]] <<- h[[mfields[f]]]
                model$alpha[[mfields[f]]] <<- alpha_best
                if (alpha_best < alphas[1]) 
                  model$alpha[[mfields[f]]] <<- alphas[1]
                if (alpha_best > alphas[nalphas]) 
                  model$alpha[[mfields[f]]] <<- alphas[nalphas]
            }
            model$R2 <<- R2_best
            model$RMSE <<- RMSE_best
            model$y_pred <<- y_pred_best
            model$y_exp <<- y
            model$Q2 <<- Q2_best
            model$RMSEcv <<- RMSEcv_best
            model$y_pred_cv <<- y_pred_cv_best
            model$a <<- a_best
            model$b <<- b_best
        }
        RMSEcv_best
    }
    par_list <- list()
    if (optimize_h) 
        par_list <- c(par_list, rep(1, nfields))
    if (!alpha_grid_search) 
        par_list <- c(par_list, 0.25)
    if (!gamma_grid_search) 
        par_list <- c(par_list, 5)
    npars <- length(par_list)
    if (npars > 1) {
        res <- optim(par_list, fr)
    }
    else if (npars == 1) {
        res <- optimize(fr, c(0.01, 20))
    }
    else {
        res <- fr()
    }
    model$set_b_0 <- set_b_0
    if (print_final_icv) {
        for (f in 1:nfields) cat(sprintf("h_%s=%g ", mfields[f], 
            model$h[[mfields[f]]]))
        cat(sprintf("\n"))
        cat(sprintf("final: alpha=%g gamma=%g RMSE=%g R2=%g RMSEcv=%g Q2=%g \n", 
            model$alpha[1], model$gamma, model$RMSE, model$R2, 
            model$RMSEcv, model$Q2))
        flush.console()
    }
    if (plot_final_icv) {
        cinf_plotxy(model$y_pred_cv, y, xlab = "Predicted", ylab = "Experiment", 
            main = "Scatter Plot for Cross-Validation (Internal)")
        abline(coef = c(0, 1))
    }
    model
  }

conmolfields documentation built on May 2, 2019, 4:18 p.m.