validate_data_matriz <- function(data){
names(data) <- tolower(names(data))
stopifnot("rut" %in% names(data))
stopifnot("mes_matriz" %in% names(data))
stopifnot(ncol(data) == 2)
data <- data %>%
distinct(rut, mes_matriz) %>%
mutate_all(as.numeric) %>%
arrange(mes_matriz, rut) %>%
mutate(id = row_number()) %>%
select(everything(), id)
data
}
get_matriz_aux <- function(drut, periodos) {
max_p_1 <- max(periodos) + -1
# x fecha observacion
# y fecha sbif
# > --- DIFF positivo --- > (periodo observacion) ----- DIFF negativo --->
dmat <- left_join(
add_year_month2(drut),
tbl(drut$src$con, "matriz_modelos") %>% add_year_month2(),
by = "rut") %>%
mutate(DIFF = 12 * (YEAR.x - YEAR.y) + (MONTH.x - MONTH.y)) %>%
select(ID, RUT, ends_with("x"), ends_with("y"), DIFF, everything()) %>%
filter(DIFF >= 0, DIFF <= max_p_1)
name <- "mat_aux"
remove_if_exists(drut$src$con, name)
qs <- db_sql_render(dmat$src$con, dmat) %>%
as.character() %>%
str_replace("FROM", sprintf("INTO %s FROM", name))
# cat(qs)
message("computing ", name)
dbSendQuery(dmat$src$con, qs)
dmat <- tbl(dmat$src$con, name)
dmat
}
get_matriz_join_dlm <- function(dmat, periodos, tblname, calculate_min_mes_matriz = FALSE) {
get_matrix_summarize_by_per_prod <- function(dmat, producto = list(1, 2, 20, 21, 22, 23, 24, 70), periodo = 6, lbl = "con") {
message("PERIODO ", periodo, " PRODUCTO ", paste(producto, collapse = "-"))
prods <- unlist(producto)
daux <- dmat %>%
filter(between(DIFF, 0, periodo - 1), producto_riesgo %in% prods) %>% # count(DIFF) %>% collect() %>% View()
select(id, operacion, DIFF, sdo = saldo, mra = mora_mes)
d1 <- daux %>%
select(id, mra, sdo) %>%
group_by(id) %>%
summarise_all(.funs = funs(mean = mean, max = max))
d2 <- daux %>%
group_by(id) %>%
summarise(
ant = max(DIFF) + 1,
ops = n_distinct(operacion)
)
d3 <- daux %>% filter(mra > 30) %>% group_by(id) %>% summarise(mra_rec_30 = min(DIFF))
d4 <- daux %>% filter(mra > 60) %>% group_by(id) %>% summarise(mra_rec_60 = min(DIFF))
d5 <- daux %>% filter(mra > 90) %>% group_by(id) %>% summarise(mra_rec_90 = min(DIFF))
d6 <- daux %>% filter(mra >120) %>% group_by(id) %>% summarise(mra_rec_120 = min(DIFF))
daux <- list(d1, d2, d3, d4, d5, d6) %>% reduce(left_join, by = "id")
vars <- daux$ops$args$vars$alias
varsnew <- paste(vars, str_pad(lbl, 2, pad = "0"), str_pad(periodo, 2, pad = "0"), sep = "_")
varsnew[1] <- vars[1]
varsnew <- as.list(set_names(vars, varsnew))
daux <- daux %>% select_(.dots = varsnew)
daux
}
dinit <- tbl(dmat$src$con, tblname) %>%
left_join(tbl(dmat$src$con, "score_antiguedad_cmr"), by = c("rut", "mes_matriz")) %>%
select(-rut, -mes_matriz)
# show_query(dinit)
if(calculate_min_mes_matriz) {
dinit <- tbl(dmat$src$con, tblname) %>%
group_by(rut) %>%
summarise(aux = 1) %>%
select(-aux) %>%
left_join(tbl(dmat$src$con, "matriz_modelos")) %>%
group_by(rut) %>%
summarise(mes_matriz_min = min(mes_matriz)) %>%
left_join(tbl(dmat$src$con, tblname), .) %>%
select(id, mes_matriz_min) %>%
left_join(dinit, .)
# show_query(dinit)
}
dprod <- data_frame(
producto = list(
con = list(1, 2, 20, 21, 22, 23, 24, 70),
com = list(19, 26, 32, 41),
ren = list(23),
lin = list(2),
aut = list(21),
cns = list(24),
# cru = list(41),
ref = list(22),
hip = list(60),
cuo = list(20)
),
lbl = names(producto)
)
ddlm <- expand.grid(
lbl = pull(dprod, lbl),
periodo = periodos
) %>%
tbl_df() %>%
left_join(dprod) %>%
mutate(dmat = list(dmat)) %>%
select(dmat, producto, periodo, lbl) %>%
pmap(get_matrix_summarize_by_per_prod) %>%
reduce(left_join, by = "id", .init = dinit)
name <- "mat_join_dlm"
remove_if_exists(dmat$src$con, name)
qs <- db_sql_render(ddlm$src$con, ddlm) %>%
as.character() %>%
str_replace("FROM", sprintf("INTO %s FROM", name))
str_length(qs)
message("computing ", name)
dbSendQuery(dmat$src$con, qs)
ddlm <- tbl(dmat$src$con, name)
ddlm
}
get_matriz_tend_by_pers <- function(ddlm, periodos = c(1, 6, 12), tblname) {
vars <- ddlm$ops$vars %>%
str_subset("mean") %>%
str_replace("_[0-9]+$", "") %>%
unique()
miniqs <- expand.grid(v = vars, p1 = periodos, p2 = periodos) %>%
tbl_df() %>%
filter(p1 < p2) %>%
mutate_if(is.factor, as.character) %>%
mutate_at(vars(p1, p2), str_pad, width = 2, pad = 0) %>%
mutate(
a = pmap_chr(., function(v, p1, p2) str_glue("case when {var}_{p2} > 0 then CAST({var}_{p1} AS float)/CAST({var}_{p2} AS float) else NULL end", var = v, p1 = p1, p2 = p2)),
b = pmap_chr(., function(v, p1, p2) str_glue("{var}_tend_{p1}_{p2}", var = v, p1 = p1, p2 = p2)),
miniq = map2_chr(a, b, function(a, b) str_glue("{a} as {b}", a = a, b = b))
) %>%
pull(miniq) %>%
str_c(collapse = ", ")
# miniqs2 <- expand.grid(v = setdiff(vars, str_subset(vars, "MAX")), p = periodos) %>%
# tbl_df() %>%
# mutate_if(is.factor, as.character) %>%
# mutate(p = str_pad(p, width = 2, pad = 0), v = str_replace_all(v, "_MEAN", "")) %>%
# mutate(
# a = map2_chr(v, p, function(v, p) str_glue("case when {var}_MAX_{p} > 0 then CAST({var}_MIN_{p} as float)/CAST({var}_MAX_{p} AS float) else -9 end", var = v, p = p)),
# b = map2_chr(v, p, function(v, p) str_glue("{var}_RATIO_MIN_MAX_{p}", var = v, p = p)),
# miniq = map2_chr(a, b, function(a, b) str_glue("{a} as {b}", a = a, b = b))
# )%>%
# pull(miniq) %>%
# str_c(collapse = ", ")
name <- str_c(tblname, "_variables")
remove_if_exists(ddlm$src$con, name)
qs <- str_glue(
"SELECT B.mes_matriz, B.rut, A.*, { fields } FROM {tbl} A LEFT JOIN {tblori} B ON A.ID = B.ID",
fields = miniqs,
# fields2 = miniqs2,
tbl = as.character(ddlm$ops$x),
tblori = tblname
) %>%
as.character() %>%
str_replace("FROM", sprintf("INTO %s FROM", name))
qs
message("computing ", name)
dbSendQuery(ddlm$src$con, qs)
dvar <- tbl(ddlm$src$con, name)
dvar
}
#' Function que calcula variables dado una tabla de rut periodo
#'
#' Function que calcula variables dado una tabla de rut periodo y una conexion
#' @param con una conexion odbc
#' @param data data frame con rut y mes_matriz
#' @param periodos vector numerico con periodos a calcular tendencias, resumentes, etc
#' @param collect collect
#' @param calculate_min_mes_matriz calculate_min_mes_matriz
#' @export
get_matriz_vars <- function(con, data, periodos = c(1, 3, 6, 12), collect = TRUE, calculate_min_mes_matriz = FALSE) {
t0 <- Sys.time()
message("Starting at: ", as.character(t0))
dbSendQuery(con, "USE MATRIX")
remove_if_exists(con, "mat_aux")
remove_if_exists(con, "mat_join_dlm")
# dbSendQuery(con, "DBCC SHRINKFILE(MATRIX_log, 1)")
name <- create_name(data, prefix = "mat")
data <- validate_data_matriz(data)
drut <- upload_data(con, data, tblname = name)
dmat <- get_matriz_aux(drut, periodos)
ddlm <- get_matriz_join_dlm(dmat, periodos, tblname = name, calculate_min_mes_matriz = calculate_min_mes_matriz)
dtnd <- get_matriz_tend_by_pers(ddlm, periodos, tblname = name)
if(collect) {
message("Collecting ", name)
dtnd <- collect(dtnd)
}
message("Ending at: ", as.character(Sys.time()))
print(difftime(Sys.time(), t0))
dtnd
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.