-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfixindex.py
executable file
·69 lines (49 loc) · 1.76 KB
/
fixindex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/python3
import sys
"""Takes a single raw MARC record as input and attempts to fix its index."""
DEBUG = False
def step_through_and_fix(index, data):
"""Check the index represents the data, and fix if not."""
separator = 0x1e
calculated_offset = 0
for i,tag in enumerate(index):
# Set current offset to calculated offset.
tag[2] = ('%05d' % calculated_offset).encode('utf-8')
assert data[calculated_offset] == separator
# If offset + len does not end on a separator, incr. len by one until it is found
while data[calculated_offset + int(tag[1])] != separator:
tag[1] = ('%04d' % (int(tag[1]) + 1)).encode('utf-8')
calculated_offset += int(tag[1])
return index
def recreate_index(index):
"""Takes as input an Array of [[tag, tag_len, offset], ... ]
returns binary index."""
output = b''
for t in index:
output += t[0] + t[1] + t[2]
return output
def fix_index(f):
f.seek(0)
leader = f.read(24)
length = leader[:5]
field_len = leader[20]
start_pos_len = leader[21]
index = []
while True:
tag = f.read(3)
if tag[0] == 0x1e:
break
tag_len = f.read(4)
offset = f.read(5)
index.append([tag, tag_len, offset])
f.seek(-3, 1) # back to end of index, at the 0x1E byte
data = f.read() # read rest of file (data section)
if DEBUG:
print("ORIGINAL INDEX: %s" % index)
fixed = step_through_and_fix(index, data)
return leader + recreate_index(fixed) + data
if __name__ == '__main__':
filename = sys.argv[1] # binary MARC filename to read
with open(filename, 'rb') as f:
fixed_marc = fix_index(f)
sys.stdout.buffer.write(fixed_marc)