-
Notifications
You must be signed in to change notification settings - Fork 0
/
file_splitter_v3.py
95 lines (71 loc) · 2.32 KB
/
file_splitter_v3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import random
import numpy as np
import time
def random_arr(size):
return np.random.randint(0, 0xffffffff, size, dtype="uint32")
class Node:
def __init__(self):
self.parent = None
self.children = []
self.words = None
def add_child(self, child):
self.children.append(child)
child.parent = self
def build(self, seed, n):
random.seed(seed)
self.children = []
leaves = [self]
for i in range(1, n):
j = random.randint(0, i-1)
node = leaves.pop(j)
node.split()
leaves += node.children
random.seed(time.time())
def split(self):
self.add_child(Node())
self.add_child(Node())
if not self.words is None:
n = len(self.words)
arr1 = random_arr(n)
arr2 = self.words ^ arr1
for i in range(1, n, 2):
arr1[i], arr2[i-1] = arr2[i-1], arr1[i]
self.children[0].words = arr1
self.children[1].words = arr2
self.words = None # memory optimisation
def get_leaves(self):
leaves = []
if len(self.children) == 0:
leaves.append(self)
else:
for child in self.children:
leaves += child.get_leaves()
return leaves
def split(inpath, outpath, seed, n):
files = []
words = []
tree = Node()
with open(inpath, "rb") as in_file:
bytes_ = in_file.read()
to_add = 4-(len(bytes_)%4)
if to_add == 0:
to_add = 4
bytes_ += bytes([to_add]*to_add)
tree.words = np.frombuffer(bytes_, dtype=">u4").copy()
tree.build(seed, n)
print("Writing")
if not os.path.exists(outpath):
os.mkdir(outpath)
for i, node in enumerate(tree.get_leaves()):
print(f"File {i}", end="\r")
with open(os.path.join(outpath, f"{i}.dat"), "wb") as out_file:
out_file.write(node.words.astype(">u4").tobytes())
print("Finished")
if __name__ == "__main__":
t1 = time.time()
split("./meme.mp4", "./meme_tree", 2378956, 32)
t2 = time.time()
print(f"{t2-t1:.6f}s")