#' get_blockinfo
#'
#' Fetch basic blockinfos fo a vector of blockhashes and optionally save them into a .csv file
#'
#' For eash blocknumber one line will be added to the data.frame. It contains the blocknumber,
#' blocksize in bytes, blockweight, blocktime, difficulty, number of transactions
#' in the rawmempool, number of verified transaction in this block and the time
#'
#' @param blocknr vector of blocknumbers for which to generate the blockinfo file.
#' @param blockhash vector of blockhashes corresponding to the blocknr.
#' @param writecsv logical, should a .csv file be created.
#' @param filename optional: the name of the file to be created (has to be "xxx.csv").
#'
#' @return a data.frame with the infos of the requested blocks.
#' @export
#'
#' @import data.table
#'
#' @examples
#' \dontrun{
#' blocknr <- 499999
#' blockhash <- "0000000000000000007962066dcd6675830883516bcf40047d42740a85eb2919"
#' get_blockinfo(blocknr = blocknr, blockhash = blockhash)
#' get_blockinfo(blocknr = blocknr, blockhash = blockhash, writecsv = TRUE)
#' get_blockinfo(blocknr = blocknr, blockhash = blockhash, writecsv = TRUE, filename = "hashes.csv")
#' }
get_blockinfo <- function(blocknr, blockhash, writecsv = FALSE, filename) {
# temporary list for results
l <- vector(mode = "list", length = length(blockhash))
for(i in 1:length(blockhash)){
# in case data fetching goes wrong
ind_size <- NA
size <- NA
weight <- NA
time <- NA
while(is.na(ind_size) || is.na(size) || is.na(weight) || is.na(time)){
# fetch raw data including
# safety check because site url can change, depending of SSL-certificate.
url1 <- paste0("http://chainquery.com/bitcoin-api/getblock/", blockhash[i],"/true")
url2 <- paste0("https://chainquery.com/bitcoin-api/getblock/", blockhash[i],"/true")
temp <- try(readLines(url1, warn = FALSE), silent = TRUE)
if(class(temp) == "try-error"){
rawdata <- readLines(url2, warn = FALSE)
} else {
rawdata <- temp
}
# use time and size as anchor points (size before tx, time after tx)
ind_size <- grep("size", rawdata)[2] # 1... strippedsize, 2... size 3/4... help
ind_weight <- ind_size + 1
ind_height <- ind_size + 2
ind_time <- grep("time", rawdata)[1]
ind_difficulty <- ind_time + 4
rawdata_size <- rawdata[ind_size]
rawdata_weight <- rawdata[ind_weight]
rawdata_height <- rawdata[ind_height]
rawdata_time <- rawdata[ind_time]
rawdata_difficulty <- rawdata[ind_difficulty]
size <- substr(rawdata_size, start = unlist(gregexpr(":", rawdata_size)) + 2, stop = nchar(rawdata_size)-1)
weight <- substr(rawdata_weight, start = unlist(gregexpr(":", rawdata_weight)) + 2, stop = nchar(rawdata_weight)-1)
height <- substr(rawdata_height, start = unlist(gregexpr(":", rawdata_height)) + 2, stop = nchar(rawdata_height)-1)
time <- substr(rawdata_time, start = unlist(gregexpr(":", rawdata_time)) + 2, stop = nchar(rawdata_time)-1)
difficulty <- substr(rawdata_difficulty,
start = unlist(gregexpr(":", rawdata_difficulty)) + 2,
stop = nchar(rawdata_difficulty)-1)
}
l[[i]] <- as.numeric(c(height, size, weight, time, difficulty))
print(paste0(i, " ", l[[i]]))
print(i)
}
# turn list into df
df <- do.call(what = rbind, l)
colnames(df) <- c("blocknr", "blocksize", "blockweight", "blocktime", "difficulty")
# generate filepaths of rmp files
filenames_rmp <- paste0("./data/rawmempool/rawmempool_", blocknr,".csv")
# load rmp files
l_rmp <- vector(mode = "list", length = length(filenames_rmp))
for (i in 1:length(filenames_rmp)) {
# only keep one column in order to save memory
l_rmp[[i]] <- data.table::fread(file = filenames_rmp[i],
header = TRUE,
stringsAsFactors = FALSE,
select = "transid", # needed later
data.table = FALSE)
print(i) # think about deleting this
}
# calculate number of tx per block in rmp
nr_tx_rmp <- sapply(l_rmp, nrow)
# get file names valtx
filenames_valtx <- paste0("./data/validated_tx/valtx_", blocknr,".csv")
# load valtx files
l_vtx <- vector(mode = "list", length = length(filenames_valtx))
for (i in 1:length(filenames_valtx)) {
l_vtx[[i]] <- data.table::fread(file = filenames_valtx[i],
header = TRUE,
stringsAsFactors = FALSE,
select = "transid", # needed later
data.table = FALSE)
print(i) # think about deleting this
}
# calculate number of validated tx per block
nr_tx_vtx <- sapply(l_vtx, nrow)
# merge data
# df is still a list
df <- as.data.frame(df)
df$nr_tx_rmp <- nr_tx_rmp
df$nr_tx_vtx <- nr_tx_vtx
df$blocktime_alt <- as.POSIXct(df$blocktime, origin = "1960-01-01")
# get file creation times
df$file_creation_time <- unclass(as.POSIXct(file.mtime(filenames_rmp)))
# watch out for files manualy created at a later point because we got no record for it.
# have to do some data cleaning
# for now i just substract 1 second from the next block which gives us
# at worst a time which is off by 59 seconds but all waiting times should
# be positive.
ind <- which(diff(df$file_creation_time) <= 0)
# need to go backwards and more than one in case more than one block
# is unobservedand got created manually and
while(length(ind) > 0){
for (i in rev(ind)) {
df$file_creation_time[i] <- df$file_creation_time[i + 1] -1
}
ind <- which(diff(df$file_creation_time) <= 0)
}
# calculate how many validated transactions I observed in absolute numbers
temp_res <- rep(0, length(blocknr))
for(i in 1:length(blocknr)){
temp_res[i] <- sum(l_vtx[[i]]$transid %in% l_rmp[[i]]$transid)
}
df$nr_obs_valtx <- temp_res
# save data
if(writecsv){
if(missing(filename)){
file_name <- paste0("./data/blockinfo.csv")
} else{
file_name <- paste0("./data/", filename)
}
write.csv2(df, file = file_name, row.names = FALSE)
}
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.