-
Notifications
You must be signed in to change notification settings - Fork 192
/
blockchain.py
257 lines (213 loc) · 9.68 KB
/
blockchain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# Copyright (C) 2015-2016 The bitcoin-blockchain-parser developers
#
# This file is part of bitcoin-blockchain-parser.
#
# It is subject to the license terms in the LICENSE file found in the top-level
# directory of this distribution.
#
# No part of bitcoin-blockchain-parser, including this file, may be copied,
# modified, propagated, or distributed except according to the terms contained
# in the LICENSE file.
import os
import mmap
import struct
import pickle
import stat
import plyvel
from blockchain_parser.transaction import Transaction
from blockchain_parser.index import DBTransactionIndex
from blockchain_parser import utils
from binascii import unhexlify
from binascii import hexlify
from .block import Block
from .index import DBBlockIndex
from .utils import format_hash
from .block_header import BlockHeader
# Constant separating blocks in the .blk files
BITCOIN_CONSTANT = b"\xf9\xbe\xb4\xd9"
def get_files(path):
"""
Given the path to the .bitcoin directory, returns the sorted list of .blk
files contained in that directory
"""
if not stat.S_ISDIR(os.stat(path)[stat.ST_MODE]):
return [path]
files = os.listdir(path)
files = [f for f in files if f.startswith("blk") and f.endswith(".dat")]
files = map(lambda x: os.path.join(path, x), files)
return sorted(files)
def get_undo_files(path):
"""
Given the path to the .bitcoin directory, returns the sorted list of rev*.dat
files contained in that directory
"""
if not stat.S_ISDIR(os.stat(path)[stat.ST_MODE]):
return [path]
files = os.listdir(path)
files = [f for f in files if f.startswith("rev") and f.endswith(".dat")]
files = map(lambda x: os.path.join(path, x), files)
return sorted(files)
def get_blocks(blockfile):
"""
Given the name of a .dat file, for every block contained in the file,
yields its raw hexadecimal value
"""
with open(blockfile, "rb") as f:
if os.name == 'nt':
size = os.path.getsize(f.name)
raw_data = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
else:
# Unix-only call, will not work on Windows, see python doc.
raw_data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
length = len(raw_data)
offset = 0
block_count = 0
while offset < (length - 4):
if raw_data[offset:offset+4] == BITCOIN_CONSTANT:
offset += 4
size = struct.unpack("<I", raw_data[offset:offset+4])[0]
offset += 4 + size
block_count += 1
yield raw_data[offset-size:offset]
else:
offset += 1
raw_data.close()
def get_block(blockfile, offset):
"""Extracts a single block from the blockfile at the given offset"""
with open(blockfile, "rb") as f:
f.seek(offset - 4) # Size is present 4 bytes before the db offset
size, = struct.unpack("<I", f.read(4))
return f.read(size)
class Blockchain(object):
"""Represent the blockchain contained in the series of .blk files
maintained by bitcoind.
"""
def __init__(self, path):
self.path = path
def get_unordered_blocks(self):
"""Yields the blocks contained in the .blk files as is,
without ordering them according to height.
"""
for blk_file in get_files(self.path):
for raw_block in get_blocks(blk_file):
yield Block(raw_block, None, os.path.split(blk_file)[1])
def _index_confirmed(self, chain_indexes, num_confirmations=6):
"""Check if the first block index in "chain_indexes" has at least
"num_confirmation" (6) blocks built on top of it.
If it doesn't it is not confirmed and is an orphan.
"""
# chains holds a 2D list of sequential block hash chains
# as soon as there an element of length num_confirmations,
# we can make a decision about whether or not the block in question
# is confirmed by checking if it's hash is in that list
chains = []
# this is the block in question
first_block = None
# loop through all future blocks
for i, index in enumerate(chain_indexes):
# if this block doesn't have data don't confirm it
if index.file == -1 or index.data_pos == -1:
return False
# parse the block
blkFile = os.path.join(self.path, "blk%05d.dat" % index.file)
block = Block(get_block(blkFile, index.data_pos))
if i == 0:
first_block = block
chains.append([block.hash])
for chain in chains:
# if this block can be appended to an existing block in one
# of the chains, do it
if chain[-1] == block.header.previous_block_hash:
chain.append(block.hash)
# if we've found a chain length == num_dependencies (usually 6)
# we are ready to make a decision on whether or not the block
# belongs to a fork or the main chain
if len(chain) == num_confirmations:
return first_block.hash in chain
def get_ordered_blocks(self, index, start=0, end=None, cache=None):
"""Yields the blocks contained in the .blk files as per
the heigt extract from the leveldb index present at path
index maintained by bitcoind.
"""
blockIndexes = None
if cache and os.path.exists(cache):
# load the block index cache from a previous index
with open(cache, 'rb') as f:
blockIndexes = pickle.load(f)
if blockIndexes is None:
with plyvel.DB(index, compression=None) as db:
# Block index entries are stored with keys prefixed by 'b'
with db.iterator(prefix=b'b') as iterator:
blockIndexes = [DBBlockIndex(format_hash(k[1:]), v) for k, v in iterator]
if cache and not os.path.exists(cache):
# cache the block index for re-use next time
with open(cache, 'wb') as f:
pickle.dump(blockIndexes, f)
# Occasionally a node will receive two different solutions to a block
# at the same time. The node saves both to disk, not pruning the
# block that leads to a shorter chain once the fork is settled without
# "-reindex"ing the bitcoind block data. This leads to sometimes there
# being two blocks with the same height in the database.
# We throw out blocks that don't have at least 6 other blocks on top of
# it (6 confirmations).
stale_blocks = [] # hold hashes of blocks that are stale with < 6 blocks on top
last_height = -1
for i, blockIdx in enumerate(blockIndexes):
if last_height > -1:
# if this block is the same height as the last block an orphan
# occurred, now we have to figure out which of the two to keep
if blockIdx.height == last_height:
# loop through future blocks until we find a chain 6 blocks
# long that includes this block. If we can't find one
# remove this block as it is invalid
if self._index_confirmed(blockIndexes[i:]):
# if this block is confirmed, the unconfirmed block is
# the previous one. Remove it.
stale_blocks.append(blockIndexes[i - 1].hash)
else:
# if this block isn't confirmed, remove it.
stale_blocks.append(blockIndexes[i].hash)
last_height = blockIdx.height
# filter out stale blocks, so we are left only with block indexes
# that have been confirmed
# (or are new enough that they haven't yet been confirmed)
blockIndexes = list(filter(lambda block: block.hash not in stale_blocks, blockIndexes))
if end is None:
end = len(blockIndexes)
if end < start:
blockIndexes = list(reversed(blockIndexes))
start = len(blockIndexes) - start
end = len(blockIndexes) - end
for blkIdx in blockIndexes[start:end]:
if blkIdx.file == -1 or blkIdx.data_pos == -1:
break
blkFile = os.path.join(self.path, "blk%05d.dat" % blkIdx.file)
yield Block(get_block(blkFile, blkIdx.data_pos), blkIdx.height)
def get_transaction(self, txid, db):
"""Yields the transaction contained in the .blk files as a python
object, similar to
https://developer.bitcoin.org/reference/rpc/getrawtransaction.html
"""
byte_arr = bytearray.fromhex(txid)
byte_arr.reverse()
tx_hash = hexlify(b't').decode('utf-8') + \
hexlify(byte_arr).decode('utf-8')
tx_hash_fmtd = unhexlify(tx_hash)
raw_hex = db.get(tx_hash_fmtd)
tx_idx = DBTransactionIndex(utils.format_hash(tx_hash_fmtd), raw_hex)
blk_file = os.path.join(self.path, "blk%05d.dat" % tx_idx.blockfile_no)
raw_hex = get_block(blk_file, tx_idx.file_offset)
offset = tx_idx.block_offset
transaction_data = raw_hex[80:]
block_header_data = raw_hex[:80]
# Try from 1024 (1KiB) -> 1073741824 (1GiB) slice widths
for j in range(0, 20):
try:
block_header = BlockHeader.from_hex(block_header_data)
offset_e = offset + (1024 * 2 ** j)
transaction = Transaction.from_hex(
transaction_data[offset:offset_e])
return [block_header, transaction]
except Exception:
continue
return None