Skip to content

Commit 25989d2

Browse files
Merge pull request #46 from UmbrellaMalware/refactoring
add Index dataclass
2 parents a25afcf + b05e6ee commit 25989d2

File tree

4 files changed

+289
-247
lines changed

4 files changed

+289
-247
lines changed

dictdatabase/dataclasses.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,14 @@ class SearchResult:
66
start_byte: int
77
end_byte: int
88
found: bool
9+
10+
11+
@dataclasses.dataclass(frozen=True)
12+
class Index:
13+
key: str
14+
key_start: int
15+
key_end: int
16+
indent_level: int
17+
indent_with: str
18+
value_hash: str
19+
old_value_end: int

dictdatabase/index_manager.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,27 @@
11
import hashlib
22

33
from dictdatabase import utils
4+
from dictdatabase.dataclasses import Index
45

56

6-
class IndexManager:
7-
@staticmethod
8-
def create_index(all_file_bytes: bytes, key: str, start, end):
9-
"""
10-
It takes a JSON file, a key, and a start and end position, and returns a tuple of information about the key and its
11-
value
7+
def create_index(all_file_bytes: bytes, key: str, start, end) -> Index:
8+
"""
9+
It takes a JSON file, a key, and a start and end position, and returns a tuple of information about the key and its
10+
value
1211
13-
Args:
14-
all_file_bytes (bytes): The entire file as a byte string.
15-
key (str): The key of the value we're indexing.
16-
start: the start of the value in the file
17-
end: the end of the value in the file
12+
Args:
13+
all_file_bytes (bytes): The entire file as a byte string.
14+
key (str): The key of the value we're indexing.
15+
start: the start of the value in the file
16+
end: the end of the value in the file
1817
19-
Returns:
20-
The key, start, end, indent_level, indent_with, value_hash, end
21-
"""
22-
key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
23-
indent_level, indent_with = utils.detect_indentation_in_json_bytes(
24-
all_file_bytes, key_start
25-
)
26-
value_bytes = all_file_bytes[start:end]
27-
value_hash = hashlib.sha256(value_bytes).hexdigest()
28-
return key, start, end, indent_level, indent_with, value_hash, end
18+
Returns:
19+
The key, start, end, indent_level, indent_with, value_hash, end
20+
"""
21+
key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
22+
indent_level, indent_with = utils.detect_indentation_in_json_bytes(
23+
all_file_bytes, key_start
24+
)
25+
value_bytes = all_file_bytes[start:end]
26+
value_hash = hashlib.sha256(value_bytes).hexdigest()
27+
return Index(key, start, end, indent_level, indent_with, value_hash, end)

dictdatabase/indexing.py

Lines changed: 63 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import orjson
44

55
from . import config
6+
from .dataclasses import Index
67

78

89
# Problem: Multiple read processes will concurrently read and write the same file
@@ -25,61 +26,65 @@
2526

2627

2728
class Indexer:
28-
"""
29-
The Indexer takes the name of a database file, and tries to load the .index file
30-
of the corresponding database file.
31-
32-
The name of the index file is the name of the database file, with the extension
33-
.index and all "/" replaced with "___"
34-
35-
The content of the index file is a json object, where the keys are keys inside
36-
the database json file, and the values are lists of 5 elements:
37-
- start_index: The index of the first byte of the value of the key in the database file
38-
- end_index: The index of the last byte of the value of the key in the database file
39-
- indent_level: The indent level of the key in the database file
40-
- indent_with: The indent string used.
41-
- value_hash: The hash of the value bytes
42-
"""
43-
44-
__slots__ = ("data", "path")
45-
46-
def __init__(self, db_name: str):
47-
# Make path of index file
48-
db_name = db_name.replace("/", "___")
49-
self.path = os.path.join(config.storage_directory, ".ddb", f"{db_name}.index")
50-
51-
os.makedirs(os.path.dirname(self.path), exist_ok=True)
52-
if not os.path.exists(self.path):
53-
self.data = {}
54-
return
55-
56-
try:
57-
with open(self.path, "rb") as f:
58-
self.data = orjson.loads(f.read())
59-
except orjson.JSONDecodeError:
60-
self.data = {}
61-
62-
63-
def get(self, key):
64-
"""
65-
Returns a list of 5 elements for a key if it exists, otherwise None
66-
Elements:[start_index, end_index, indent_level, indent_with, value_hash]
67-
"""
68-
return self.data.get(key, None)
69-
70-
71-
def write(self, key, start_index, end_index, indent_level, indent_with, value_hash, old_value_end):
72-
"""
73-
Write index information for a key to the index file
74-
"""
75-
76-
if self.data.get(key, None) is not None:
77-
delta = end_index - old_value_end
78-
for entry in self.data.values():
79-
if entry[0] > old_value_end:
80-
entry[0] += delta
81-
entry[1] += delta
82-
83-
self.data[key] = [start_index, end_index, indent_level, indent_with, value_hash]
84-
with open(self.path, "wb") as f:
85-
f.write(orjson.dumps(self.data))
29+
"""
30+
The Indexer takes the name of a database file, and tries to load the .index file
31+
of the corresponding database file.
32+
33+
The name of the index file is the name of the database file, with the extension
34+
.index and all "/" replaced with "___"
35+
36+
The content of the index file is a json object, where the keys are keys inside
37+
the database json file, and the values are lists of 5 elements:
38+
- start_index: The index of the first byte of the value of the key in the database file
39+
- end_index: The index of the last byte of the value of the key in the database file
40+
- indent_level: The indent level of the key in the database file
41+
- indent_with: The indent string used.
42+
- value_hash: The hash of the value bytes
43+
"""
44+
45+
__slots__ = ("data", "path")
46+
47+
def __init__(self, db_name: str):
48+
# Make path of index file
49+
db_name = db_name.replace("/", "___")
50+
self.path = os.path.join(config.storage_directory, ".ddb", f"{db_name}.index")
51+
52+
os.makedirs(os.path.dirname(self.path), exist_ok=True)
53+
if not os.path.exists(self.path):
54+
self.data = {}
55+
return
56+
57+
try:
58+
with open(self.path, "rb") as f:
59+
self.data = orjson.loads(f.read())
60+
except orjson.JSONDecodeError:
61+
self.data = {}
62+
63+
def get(self, key):
64+
"""
65+
Returns a list of 5 elements for a key if it exists, otherwise None
66+
Elements:[start_index, end_index, indent_level, indent_with, value_hash]
67+
"""
68+
return self.data.get(key, None)
69+
70+
def write(self, index: Index):
71+
"""
72+
Write index information for a key to the index file
73+
"""
74+
75+
if self.data.get(index.key, None) is not None:
76+
delta = index.key_end - index.old_value_end
77+
for entry in self.data.values():
78+
if entry[0] > index.old_value_end:
79+
entry[0] += delta
80+
entry[1] += delta
81+
82+
self.data[index.key] = [
83+
index.key_start,
84+
index.key_end,
85+
index.indent_level,
86+
index.indent_with,
87+
index.value_hash,
88+
]
89+
with open(self.path, "wb") as f:
90+
f.write(orjson.dumps(self.data))

0 commit comments

Comments
 (0)