Skip to content

Commit a23e63e

Browse files
committed
Updated docs. Updated sbxenc to use seqbox module. tweaks on the library.
1 parent b710094 commit a23e63e

File tree

3 files changed

+70
-94
lines changed

3 files changed

+70
-94
lines changed

notes.txt

+33-16
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,42 @@
11
SeqBox - Sequenced Box container (SEQBOX/SBX)
22
===========================================
33

4-
Encode a file in a container that can be reconstructed even after total loss of file system structures.
5-
Use a blocksize equal or submultiple of a sector/cluster size, with a minimal header that include block
6-
sequence number, checksum and other info.
7-
Additional, non critical info are contained in block 0 (like name, file size, other attributes, etc.).
4+
Encode a file in a container that can be reconstructed even after total
5+
loss of file system structures. Use a blocksize equal or submultiple of a
6+
sector/cluster size, with a minimal header that include block sequence
7+
number, checksum and other info.
8+
Additional, non critical info/metadata are contained in block 0 (like name,
9+
file size, other attributes, etc.).
810

9-
Recovery can be performed simply scanning a disk / image, reading sector/cluster sized slices and checking
10-
block signature and then CRC to detect SeqBox blocks. Then blocks can be sorted by UID's and sequence numbers.
11+
Recovery can be performed simply scanning a disk / image, reading
12+
sector/cluster sized slices and checking block signature and then CRC to
13+
detect valid SeqBox blocks. Then blocks can be sorted by UID's and sequence
14+
numbers.
1115

12-
Optionally blocks can be freely duplicated and/or stored in different media to enhance recoverability.
13-
Eventually even a simil-RAID system could be implemented between blocks.
16+
Optionally blocks can be freely duplicated and/or stored in different media
17+
to enhance recoverability.
1418

1519
The UID can be anything, as long as is unique for the specific application.
16-
It could be random generated, or a hash of the file content, or a simple sequence, etc.
20+
It could be random generated, or a hash of the file content, or a simple
21+
sequence, etc. For the tools is just a sequence of bytes.
1722

1823
Overhead is minimal: from 16B/512B (+1 512B block) to 16B/32KB (+1 32KB block)
1924

2025
Could become part of a File System.
2126

22-
2327
Command line tools to:
24-
- encode file to SBX
25-
- decode SBX to file
26-
- utility to get info and scramble / sort / test / damage SBX file
27-
- recovery/scan tool to find/list and recover series of RBX files from a file (image/device)
28+
- sbxenc: encode file to SBX
29+
- sbxdec: decode SBX to file (and also test or get info)
30+
- sbxscan: scan files to build an Sqlite db of blocks positions, num, uid
31+
and a detailed log (in various formats, to enable other tools)
32+
- sbxrec: rebuild sbx files using previous scanned info
2833

2934

3035
Common blocks header:
3136

3237
0- 2 3 Recoverable Block signature = 'SBx'
3338
3- 3 1 Version byte
34-
4- 5 2 Block CRC-16
39+
4- 5 2 Block CRC-16 (Version is used as starting value)
3540
6- 11 6 file UID (MD5 or other 32bit hash)
3641
12- 15 4 Block sequence number
3742

@@ -43,10 +48,22 @@ Block sequence = 0
4348
22- 29 8 file lenght
4449
30-284 255 file/dir name (UTF-8)
4550
285-316 32 file content crypto hash (optional)
46-
317-blksize padding
51+
317-nnn nnn encoded metadata
52+
nnn-blksize padding
4753
------------------------
4854

4955
Block sequence > 0:
5056

5157
16-blocksize = data
58+
------------------------
59+
60+
metadata encoding:
61+
62+
3 bytes str ID + 1 byte length + data
63+
64+
ID:
5265

66+
FNM filename (utf-8)
67+
SNM sbx filename (utf-8)
68+
FSZ filesize 8 bytes
69+
HSH SHA256 crypto hash

sbxenc.py

+21-65
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,7 @@
2222

2323
import seqbox
2424

25-
PROGRAM_VER = "0.03a"
26-
27-
def errexit(errlev=1, mess=""):
28-
"""Display an error and exit."""
29-
if mess != "":
30-
print("%s: error: %s" % (os.path.split(sys.argv[0])[1], mess))
31-
sys.exit(errlev)
32-
25+
PROGRAM_VER = "0.04a"
3326

3427
def get_cmdline():
3528
"""Evaluate command line parameters, usage & help."""
@@ -58,12 +51,7 @@ def banner():
5851
def usage():
5952
print("""usage:
6053
61-
seqbox e file.sbx file encode file in file.sbx
62-
seqbox d file.sbx file decode file from file.sbx
63-
seqbox i file.sbx show information on file.sbx
64-
seqbox t file.sbx test file.sbx for integrity
65-
seqbox r [-d path] filenames [filenames ...] recover sbx files from filenames
66-
and store in path
54+
seqbox <file> [file.sbx] encode file in file.sbx
6755
""")
6856

6957
def getcmdargs():
@@ -76,19 +64,23 @@ def getcmdargs():
7664
usage()
7765
errexit(0)
7866

79-
res["cmd"] = sys.argv[1].lower()
80-
81-
if res["cmd"] in ["e"]:
82-
if len(sys.argv) == 4:
83-
res["sbxfile"] = sys.argv[2]
84-
res["file"] = sys.argv[3]
85-
else:
86-
usage()
87-
errexit(1)
88-
else:
89-
errexit(1, "command %s not yet implemented." % res["cmd"])
90-
67+
if len(sys.argv) > 1:
68+
res["file"] = sys.argv[1]
69+
res["sbxfile"] = res["file"] + ".sbx"
70+
if len(sys.argv) > 2:
71+
res["sbxfile"] = sys.argv[2]
72+
if len(sys.argv) > 3:
73+
usage()
74+
errexit(1)
75+
9176
return res
77+
78+
79+
def errexit(errlev=1, mess=""):
80+
"""Display an error and exit."""
81+
if mess != "":
82+
print("%s: error: %s" % (os.path.split(sys.argv[0])[1], mess))
83+
sys.exit(errlev)
9284

9385

9486
def main():
@@ -99,18 +91,19 @@ def main():
9991

10092
filename = cmdline["file"]
10193
sbxfilename = cmdline["sbxfile"]
102-
94+
10395
print("reading %s..." % filename)
10496
filesize = os.path.getsize(filename)
10597
sha256 = getsha256(filename)
10698
fin = open(filename, "rb")
10799
fout = open(sbxfilename, "wb")
108100

109-
sbx = seqbox.sbxBlock()
101+
sbx = seqbox.sbxBlock(uid=b'uiduid')
110102

111103
#write block 0
112104
sbx.metadata = {"filesize":filesize,
113105
"filename":filename,
106+
"sbxname":sbxfilename,
114107
"hash":sha256}
115108
fout.write(sbx.encode())
116109

@@ -131,43 +124,6 @@ def main():
131124

132125
print("\nok!")
133126

134-
######################
135-
136-
cmdline["sbxfile"] = r"c:\t\test.sbx"
137-
cmdline["filename"] = r"c:\t\out.zip"
138-
139-
print("\nTesting...")
140-
141-
sbxfilename = cmdline["sbxfile"]
142-
filename = cmdline["filename"]
143-
fin = open(sbxfilename, "rb")
144-
fout= open(filename, "wb")
145-
146-
sbx = seqbox.sbxBlock()
147-
lastblocknum = 0
148-
d = hashlib.sha256()
149-
while True:
150-
buffer = fin.read(sbx.blocksize)
151-
if len(buffer) < sbx.blocksize:
152-
break
153-
if not sbx.decode(buffer):
154-
errexit(errlev=1, mess="Invalid block.")
155-
else:
156-
print("Block #",sbx.blocknum)
157-
if sbx.blocknum == 0:
158-
#get metadata
159-
pass
160-
else:
161-
fout.write(sbx.data)
162-
d.update(sbx.data)
163-
164-
fout.close()
165-
fin.close()
166-
167-
print("File decoded.")
168-
print("Hash:", d.hexdigest())
169-
170-
171127

172128
if __name__ == '__main__':
173129
main()

seqbox.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ def __init__(self, ver=0, uid="r"):
3535

3636
if uid == "r":
3737
random.seed()
38-
self.uid = random.getrandbits(48)
38+
self.uid = random.getrandbits(6*8).to_bytes(6, byteorder='big')
3939
else:
40-
self.uid = 0
40+
self.uid = (b'\x00'*6 + uid)[-6:]
4141

4242
self.parent_uid = 0
4343
self.metadata = {}
@@ -52,19 +52,22 @@ def encode(self):
5252
self.data = b""
5353
if "filename" in self.metadata:
5454
bb = self.metadata["filename"].encode()
55-
self.data += b"NM" + len(bb).to_bytes(1, byteorder='little') + bb
55+
self.data += b"FNM" + len(bb).to_bytes(1, byteorder='big') + bb
56+
if "sbxname" in self.metadata:
57+
bb = self.metadata["sbxname"].encode()
58+
self.data += b"SNM" + len(bb).to_bytes(1, byteorder='big') + bb
5659
if "filesize" in self.metadata:
57-
bb = self.metadata["filesize"].to_bytes(8, byteorder='little', signed=True)
58-
self.data += b"SZ" + len(bb).to_bytes(1, byteorder='little') + bb
60+
bb = self.metadata["filesize"].to_bytes(8, byteorder='big')
61+
self.data += b"FSZ" + len(bb).to_bytes(1, byteorder='big') + bb
5962
if "hash" in self.metadata:
6063
bb = self.metadata["hash"]
61-
self.data += b"HS" + len(bb).to_bytes(1, byteorder='little') + bb
64+
self.data += b"HSH" + len(bb).to_bytes(1, byteorder='big') + bb
6265

6366
data = self.data + b'\x1A' * (self.datasize - len(self.data))
64-
buffer = (self.uid.to_bytes(6, byteorder='little') +
65-
self.blocknum.to_bytes(4, byteorder='little') +
67+
buffer = (self.uid +
68+
self.blocknum.to_bytes(4, byteorder='big') +
6669
data)
67-
crc = binascii.crc_hqx(buffer,0).to_bytes(2,byteorder='little')
70+
crc = binascii.crc_hqx(buffer, self.ver).to_bytes(2,byteorder='big')
6871
return (self.magic + crc + buffer)
6972

7073
def decode(self, buffer):
@@ -79,13 +82,13 @@ def decode(self, buffer):
7982
print("Version:", buffer[3])
8083

8184
#check CRC of rest of the block
82-
crc = int.from_bytes(buffer[4:6], byteorder='little')
83-
if crc != binascii.crc_hqx(buffer[6:],0):
85+
crc = int.from_bytes(buffer[4:6], byteorder='big')
86+
if crc != binascii.crc_hqx(buffer[6:], self.ver):
8487
return False
8588
print("CRC: OK!")
8689

87-
self.uid = int.from_bytes(buffer[6:12], byteorder='little')
88-
self.blocknum = int.from_bytes(buffer[12:16], byteorder='little')
90+
self.uid = buffer[6:12]
91+
self.blocknum = int.from_bytes(buffer[12:16], byteorder='big')
8992
self.data = buffer[16:]
9093

9194
if self.blocknum == 0:

0 commit comments

Comments
 (0)