forked from SomeOddCodeGuy/WilmerAI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_utils.py
137 lines (107 loc) · 4.93 KB
/
file_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import json
import logging
import os
from pathlib import Path
from typing import Dict
logger = logging.getLogger(__name__)
def ensure_json_file_exists(filepath, initial_data=None):
"""Ensure that the JSON file exists and return its contents.
If the file does not exist and initial_data is provided,
write initial_data to the file and return it. Otherwise,
return an empty list if the file does not exist.
Args:
filepath (str): The path to the JSON file.
initial_data (list, optional): The initial data to write to the file if it does not exist.
Returns:
list: The contents of the JSON file, or the provided initial_data if the file was created.
"""
if not os.path.exists(filepath):
if initial_data is not None:
with open(filepath, 'w') as file:
json.dump(initial_data, file, indent=4)
else:
with open(filepath, 'w') as file:
file.write("[]")
return initial_data if initial_data is not None else []
with open(filepath) as file:
return json.load(file)
def read_chunks_with_hashes(filepath):
"""Read chunks with hashes from a JSON file.
Args:
filepath (str): The path to the JSON file containing chunks with hashes.
Returns:
list: A list of tuples, where each tuple contains a text block and its corresponding hash.
"""
data_loaded = ensure_json_file_exists(filepath)
return [(item['text_block'], item['hash']) for item in data_loaded]
def write_chunks_with_hashes(chunks_with_hashes, filepath, overwrite=False):
"""Write chunks with hashes to a JSON file, optionally overwriting existing content.
Args:
chunks_with_hashes (list): A list of tuples, where each tuple contains a text block and a hash.
filepath (str): The path to the JSON file where chunks with hashes will be written.
overwrite (bool): If True, overwrite the existing file content; otherwise, append to it.
"""
existing_data = ensure_json_file_exists(filepath)
new_data = [{'text_block': text_block, 'hash': hash_code} for text_block, hash_code in chunks_with_hashes]
if overwrite:
combined_data = new_data
else:
combined_data = existing_data + new_data
with open(filepath, 'w') as file:
json.dump(combined_data, file, indent=4)
def update_chunks_with_hashes(chunks_with_hashes, filepath, mode='append'):
"""Update chunks with hashes in a JSON file, appending or overwriting based on mode.
Args:
chunks_with_hashes (list): A list of tuples, where each tuple contains a text block and a hash.
filepath (str): The path to the JSON file where chunks with hashes will be updated.
mode (str): The mode of operation. Use 'append' to add new chunks to the existing data, or
'overwrite' to replace the existing data.
"""
if mode == 'overwrite':
write_chunks_with_hashes(chunks_with_hashes, filepath, overwrite=True)
else:
write_chunks_with_hashes(chunks_with_hashes, filepath, overwrite=False)
def get_logger_filename():
"""Get the path to the logging file for Wilmer.
Returns:
str: The path to the logging file for Wilmer.
"""
util_dir = os.path.dirname(os.path.abspath(__file__))
middleware_dir = os.path.dirname(util_dir)
project_dir = os.path.dirname(middleware_dir)
return os.path.join(project_dir, "logs", 'wilmerai.log')
def load_timestamp_file(filepath: str) -> Dict[str, str]:
"""Load the timestamp file if it exists, otherwise return an empty dictionary."""
if os.path.exists(filepath):
logger.debug(f"File exists: {filepath}")
with open(filepath, 'r') as file:
logger.info(f"Opening file: {filepath}")
return json.load(file)
else:
logger.warning(f"File does not exist: {filepath}")
return {}
def save_timestamp_file(filepath: str, timestamps: Dict[str, str]):
"""Save the timestamp data to the appropriate file."""
with open(filepath, 'w') as file:
json.dump(timestamps, file, indent=4)
def load_custom_file(filepath: str, delimiter: str | None, custom_delimiter: str | None):
"""
Load a custom file that contains simple text.
Args:
filepath (str): The path to the file to load
delimiter (str): The delimiter to use when reading the file.
custom_delimiter (str): The delimiter to replace the file delimiter with when returning the contents of the file
Returns:
The contents of the file, separated by the custom_delimiter if applicable.
"""
path = Path(filepath)
if path.exists():
with path.open('r') as f:
content = f.read()
if not content:
return "No additional information added"
if delimiter is not None and custom_delimiter is not None:
content = content.replace(delimiter, custom_delimiter)
return content
else:
return "Custom instruction file did not exist"