knitr::opts_chunk$set(eval = FALSE)

This vignette shows how to use decryptr to break TRTs captcha.

devtools::load_all()

Download images

Firstly, create a folder and download images.

dest <- '~/data-raw/decryptr/trt'
dir.create(dest, recursive = TRUE, showWarnings = FALSE)
result <- download_trt(dest, n = 600)

Image classification

# get names of classified images
classified <- dir(dest, pattern = '_') %>% 
  stringr::str_extract('[^_]+') %>% 
  stringr::str_c(collapse = '|')

# create list of unclassified images
not_classified <- dir(dest, full.names = TRUE) %>% 
  magrittr::extract(!stringr::str_detect(., classified)) %>% 
  read_captcha()


predict(m, arq = read_captcha(not_classified[1]))

# classify all images
classify(not_classified)
dest <- '~/data-raw/decryptr/trt'
# get names of classified images
classified <- dir(dest, pattern = '_') %>% 
  stringr::str_extract('[^_]+') %>% 
  stringr::str_c(collapse = '|')

# create list of unclassified images
not_classified <- dir(dest, full.names = TRUE) %>% 
  magrittr::extract(stringr::str_detect(., 'FALSE')) %>% 
  read_captcha()


predict(m, arq = read_captcha(not_classified[1]))

# classify all images
classify(not_classified)
dir.create('~/data-raw/decryptr/trt4')
model <- keras::load_model('inst/model.keras')
labs <- c("2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "d", "e",
          "f", "h", "j", "k", "m", "n", "r", "s", "t", "u", "v", "w",
          "x", "y")
m <- list(model = model, labs = labs)
class(m) <- 'captcha'
dest <- '~/data-raw/decryptr/trt4'
safe_oracle <- purrr::possibly(oracle, FALSE)
p <- progress::progress_bar$new(total = 1000)
purrr::walk(1:1000, ~{
  safe_oracle(m, dest)
  p$tick()
})

not_classified <- dir(dest, full.names = TRUE) %>% 
  magrittr::extract(stringr::str_detect(., 'TRUE')) %>% 
  read_captcha()
length(not_classified)

Data preparation

# get names of classified images
dest <- "~/data-raw/decryptr/trt"
classified1 <- dir(dest, pattern = '_', full.names = TRUE)

dest3 <- "~/data-raw/decryptr/trt3"
classified2 <- dir(dest3, pattern = '_', full.names = TRUE) %>% 
  magrittr::extract(stringr::str_detect(., 'TRUE'))

classified <- c(classified1, classified2) %>%
  read_captcha()

prepared_data <- prepare(classified)
str(prepared_data)
labs <- c("2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "d", "e",
          "f", "h", "j", "k", "m", "n", "r", "s", "t", "u", "v", "w",
          "x", "y")
m <- list(model = model, labs = labs)
class(m) <- 'captcha'

classified3 <- dir(dest3, pattern = '_', full.names = TRUE) %>% 
  magrittr::extract(stringr::str_detect(., 'FALSE'))

i <- 0
i <- i+1
plot(read_captcha(classified3[i]))
predict(m, arq = read_captcha(classified3[i]))

Model!

Test, train, validation

set.seed(19910402)
library(keras)
m <- model(prepared_data, epochs = 100)

Predict

dest2 <- paste0(dest, '2')
dir.create(dest2)
plyr::l_ply(1:10, function(x) download_trt(dest2), .progress = 'text')
arqs <- dir(dest2, full.names = TRUE)
a <- read_captcha(sample(arqs, 1))
plot(a)
predict(m, arq = a)

Results

probs <- predict(model, x_valid)
predicoes <- apply(probs, c(1, 2), which.max)
y_valid_obs <- apply(y_valid, c(1, 2), which.max)
mean(abs(probs - y_valid))
mean(predicoes == y_valid_obs)
purrr::map_dbl(1:6, ~mean(predicoes[,.x] == y_valid_obs[,.x]))
# matriz de erros
nm <- colnames(y_valid[1,,])
tibble::tibble(
  y_pred = as.vector(apply(predicoes, 1, function(i) nm[i])),
  y_obs = as.vector(apply(y_valid_obs, 1, function(i) nm[i]))
) %>% 
  dplyr::count(y_obs, y_pred) %>% 
  tidyr::spread(y_pred, n, fill = '.') %>% 
  View()

library(decryptr)
classify_dbc_one <- function(arq, user, pass, ntry = 10) {
  base64 <- arq %>% 
    readr::read_file_raw() %>% 
    base64enc::base64encode() %>% 
    sprintf(fmt = 'base64:%s', .)
  captcha_data <- list(username = user, password = pass, captchafile = base64)
  url_captcha <- 'http://api.dbcapi.me/api/captcha'

  r_captcha <- httr::POST(url_captcha, body = captcha_data)
  status <- httr::status_code(r_captcha)
  ct <- httr::content(r_captcha)
  cont <- 1
  while((status != 200 || ct$text == '') && cont <= 10) {
    Sys.sleep(.5)
    r_captcha <- httr::POST(url_captcha, body = captcha_data)
    status <- httr::status_code(r_captcha)
    ct <- httr::content(r_captcha)
    cont <- cont + 1
  }
  arq_captcha <- decryptr::read_captcha(arq)
  decryptr::classify(arq_captcha, answer = tolower(ct$text))
  invisible(TRUE)
}
classify_dbc <- function(arqs, user, pass) {
  n <- length(arqs)
  safe_classify_dbc_one <- purrr::possibly(classify_dbc_one, FALSE)
  p <- progress::progress_bar$new(total = n)
  result <- purrr::map_lgl(arqs, ~{
    result <- safe_classify_dbc_one(.x, user, pass)
    p$tick()
    result
  })
  result
}
# download_trt(dest = '~/data-raw/decryptr/trt_dbc/', 4999)
arqs_class <- '~/data-raw/decryptr/trt_dbc' %>% 
  dir(full.names = TRUE, pattern = "_") %>% 
  basename() %>% 
  stringr::str_extract('[a-z0-9]+') %>% 
  glue::collapse('|')

arqs <- dir('~/data-raw/decryptr/trt_dbc', full.names = TRUE) %>% 
  magrittr::extract(!stringr::str_detect(., arqs_class)) %>% 
  read_captcha()

classify(arqs)
library(decryptr)
dest_dir <- '~/data-raw/decryptr/trt'
classified_captchas <- dest_dir %>% 
  dir(full.names = TRUE, pattern = '_') %>% 
  magrittr::extract(file.size(.) > 0) %>% 
  read_captcha()

# purrr::walk(seq_along(classified_captchas), ~{
#   print(.x)
#   prepare(read_captcha(classified_captchas[c(1, .x)]))
# })
# 
# classified_captchas[2303] %>% 
#   read_captcha() %>% file.remove()
# 
# a <- classified_captchas %>% 
#   stringr::str_subset('WRONG')
# b <- classified_captchas %>% 
#   stringr::str_subset('_ ') %>% 
#   stringr::str_replace_all('_ +', '_')
# file.rename(a, b)

prepared_trt <- prepare(classified_captchas)
str(prepared_trt)
saveRDS(prepared_trt, '~/decryptr/decryptr/vignettes/prepared_trt.rds')
dest_dir <- '~/data-raw/decryptr/trt_oracle'
pat <- 'FALSE'
# get names of classified image
classified <- dir(dest_dir, pattern = pat, full.names = TRUE) %>% 
  sort()
classified_original <- dir(dest_dir, pattern = pat) %>% 
  stringr::str_extract('[a-z0-9]+') %>% 
  sprintf('%s/%s.jpeg', dest_dir, .) %>% 
  sort()
for(i in 1:length(classified)) {
  classify(read_captcha(classified_original[i]))
  file.remove(classified[i])
}
'~/data-raw/decryptr/trt_oracle' %>% 
  dir(full.names = TRUE, pattern = '^[^_]+$') %>% 
  file.remove()
a <- '~/data-raw/decryptr/trt_oracle' %>% 
  dir(full.names = TRUE, pattern = 'TRUE')
b <- a %>% stringr::str_replace_all('_TRUE_', '_')
file.rename(a, b)

classificando com oraculo

model <- list(
  labs = dimnames(prepared_trt$y)[[3]], 
  model = load_model_hdf5('~/decryptr/decryptrModels/inst/keras/rfb-03.hdf5')
)
class(model) <- 'captcha'



arq <- download_rfb('tests_rfb', 1) %>% read_captcha()
plot(arq)
(x <- predict(model, arq = arq))
classify(arq, answer = x)

length(dir('tests_rfb')) / 2






oracle_dir <- '~/data-raw/decryptr/trt_oracle'
safe_oracle <- purrr::possibly(oracle, 'erro')
res <- purrr::map_chr(1:17, ~{
  Sys.sleep(1)
  a <- safe_oracle(model, oracle_dir)
  cat(a)
  if (!isTRUE(a)) {
    cat('...bora esperar...')
    Sys.sleep(10)
  } 
  cat('\n')
  as.character(a)
})


length(dir(oracle_dir, pattern = 'TRUE'))
length(dir(oracle_dir, pattern = 'FALSE'))
length(dir(oracle_dir)) / 2


marceloturim/decryptr documentation built on May 14, 2019, 5:26 p.m.