-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtgcutter.py
90 lines (77 loc) · 2.86 KB
/
tgcutter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
import os
import sys
import time
import itertools
def spinner():
"""Displays a rotating spinner in the main thread."""
for char in itertools.cycle('-\\|/'):
sys.stdout.write(f'\rProcessing {char}')
sys.stdout.flush()
time.sleep(0.1)
def extract_text_blocks(data):
"""
Recursively collects text blocks and calculates statistics:
total_messages, text_messages, empty_messages, service_messages, total_characters.
"""
texts = []
stats = {
'total_messages': 0,
'text_messages': 0,
'empty_messages': 0,
'service_messages': 0,
'total_characters': 0
}
def recurse(obj):
if isinstance(obj, dict):
if 'type' in obj:
stats['total_messages'] += 1
if obj['type'] == 'message' and 'full_text' in obj:
val = obj['full_text']
if isinstance(val, str) and val.strip():
texts.append(f'{{ "{val}" }}')
stats['text_messages'] += 1
stats['total_characters'] += len(val.strip())
else:
stats['empty_messages'] += 1
elif obj['type'] == 'service':
stats['service_messages'] += 1
for value in obj.values():
recurse(value)
elif isinstance(obj, list):
for item in obj:
recurse(item)
recurse(data)
return texts, stats
def main():
start_time = time.time()
input_file = "result.json"
output_file = "output.txt"
if not os.path.exists(input_file):
print(f"Error: '{input_file}' not found.")
print("Processing completed")
return
try:
with open(input_file, 'r', encoding='utf-8') as infile:
data = json.load(infile)
text_blocks, stats = extract_text_blocks(data)
# Save extracted blocks
with open(output_file, 'w', encoding='utf-8') as outfile:
outfile.write(",\n".join(text_blocks))
# Print statistics
print("\nProcessing completed:")
print(f"- Total messages: {stats['total_messages']}")
print(f"- Text messages: {stats['text_messages']}")
print(f"- Empty messages: {stats['empty_messages']}")
print(f"- Service messages: {stats['service_messages']}")
print(f"- Total characters in text messages: {stats['total_characters']}")
print(f"- Output: '{output_file}'")
except json.JSONDecodeError:
print("Error: The JSON file is invalid or empty.")
except Exception as e:
print(f"Error: {e}")
finally:
print("Processing completed")
print(f"Time taken: {time.time() - start_time:.2f} seconds")
if __name__ == "__main__":
main()