Nothing
# documented, memoised versions of these are at the end
.malware_hash <- function(hashes, timeout=getOption("timeout")) {
host <- "hash.cymru.com"
port <- 43
# setup query
cmd <- "begin\n"
hashes_c <- paste(unlist(hashes), collapse="\n")
cmd <- sprintf("%s%s\nend\n", cmd, hashes_c)
# setup connection and post query
con <- sock(host=host, port=port, blocking=TRUE, open="r+", timeout=timeout)
if (is.null(con$result)) {
message("Error opening connection to hash.cymru.com")
data.frame(
sha1_md5 = rep(NA, length(hashes)),
last_known_timestamp = rep(NA, length(hashes)),
detection_pct = rep(NA, length(hashes))
) -> out
class(out) <- c("tbl_df", "tbl", "data.frame")
return(out)
}
con <- con$result
cat(cmd, file=con)
response <- readLines(con)
close(con)
if (length(response) == 0) {
message("Error reading from connection to hash.cymru.com")
data.frame(
sha1_md5 = rep(NA, length(hashes)),
last_known_timestamp = rep(NA, length(hashes)),
detection_pct = rep(NA, length(hashes))
) -> out
class(out) <- c("tbl_df", "tbl", "data.frame")
return(out)
}
# trim header, split fields and convert results
trim_df(
read.table(
textConnection(tail(response, -2)),
stringsAsFactors = FALSE,
header = FALSE,
na.strings = "NO_DATA"
)
) -> response
names(response) <- c("sha1_md5", "last_known_timestamp", "detection_pct")
response$last_known_timestamp <- as.POSIXct(
response$last_known_timestamp, "1970-01-01 00:00:00", tz="GMT"
)
response$detection_pct <- as.numeric(response$detection_pct) / 100
class(response) <- c("tbl_df", "tbl", "data.frame")
return(response)
}
#' Retrieves malware hash metadata from the Malware Hash Registry
#'
#' The Malware Hash Registry (MHR) project is a look-up service similar to the
#' Team Cymru IP address to ASN mapping project. This project differs however,
#' in that you can query the service for a computed MD5 or SHA-1 hash of a file
#' and, if it is malware and the service knows about it, it returns the last
#' time it's seen it along with an approximate anti-virus detection percentage.
#'
#' @param hashes vector of IPv4 address (character - dotted-decimal)
#' @param timeout numeric: the timeout (in seconds) to be used for this connection.
#' Beware that some OSes may treat very large values as zero: however the
#' POSIX standard requires values up to 31 days to be supported.
#' @return data frame of BGP Origin ASN lookup results
#' \itemize{
#' \item \code{sha1_md5} - hash queried for
#' \item \code{last_known_timestamp} - last known GMT timestamp associated with that hash
#' \item \code{detection_pct} - detection percentage across a mix of AV packages
#' }
#' If a socket connection cannot be made (i.e. a network problem on your
#' end or a service/network problem on their end), all columns will be
#' \code{NA}.
#' @note Attempting to enumerate the malware registry via the public service
#' interface is not only impractical, it is also strictly prohibited.
#' Contact Team Cymru if the public interface is insufficient for your
#' needs and we may be able to come up with alternative arrangement. Also,
#' A direct connection to TCP Port 43 (WHOIS) is required for most of these
#' API functions to work properly.
#' @seealso \url{http://www.team-cymru.org/IP-ASN-mapping.html}
#' @export
#' @examples \dontrun{
#' malware_hash(c("1250ac278944a0737707cf40a0fbecd4b5a17c9d",
#' "7697561ccbbdd1661c25c86762117613",
#' "cbed16069043a0bf3c92fff9a99cccdc",
#' "e6dc4f4d5061299bc5e76f5cd8d16610",
#' "e1112134b6dcc8bed54e0e34d8ac272795e73d74"))
#' }
malware_hash <- memoise::memoise(.malware_hash)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.