-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtools.py
205 lines (161 loc) · 6.51 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import json
import re
from collections import Counter
from pathlib import Path
from typing import Dict, List, Literal, Optional
import matplotlib.colors as mcolors
import numpy as np
class Conversation:
PUNCTUATION = r""",!\.…/"'\(\)\*\?=–;:^—~«»"""
CLEAN_REGEX = re.compile(f"[{PUNCTUATION}]")
def __init__(self, chat: dict):
self.messages = chat['messages']
self.name = chat['name']
self._word_filter = lambda msg: True
def get_word_list(self) -> List[str]:
words = []
for msg in self.messages:
if not self._word_filter(msg):
continue
text = msg.get('text', '')
if not isinstance(text, str):
continue
cleaned = self.CLEAN_REGEX.sub(' ', text.lower())
words.extend(filter(None, re.split(r'\s+', cleaned.strip())))
return words
def count_words(self) -> Counter:
return Counter(self.get_word_list())
ShapeType = Literal["circle", "rect"]
def get_chat_list(filename: str) -> List[dict]:
"""Load chats from JSON file"""
with open(filename, encoding='utf-8') as f:
data = json.load(f)
if 'chats' in data:
return data['chats']['list']
return [data]
def find_chat_by_name(name: str, chats: list) -> Optional[dict]:
"""Find chat by name (case sensitive)"""
return next((c for c in chats if c.get('name') == name), None)
def get_contrast_color(background_color: str) -> str:
"""Get black or white depending on background luminance"""
try:
rgb = mcolors.to_rgb(background_color)
except ValueError:
rgb = (0, 0, 0) # Fallback to black on error
# Calculate luminance (per ITU-R BT.709)
luminance = 0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]
return "#000000" if luminance > 0.5 else "#ffffff"
def create_mask(shape: ShapeType, width: int, height: int) -> Optional[np.ndarray]:
"""Create mask array for word cloud"""
if shape == "circle":
y, x = np.ogrid[:height, :width]
center_x, center_y = width//2, height//2
radius = min(width, height) // 2 - 10
mask = (x - center_x)**2 + (y - center_y)**2 > radius**2
return 255 * mask.astype(int)
return None
def calculate_frequencies(selected_conv: Conversation,
other_chats: list,
excluded_words: set[str]) -> Dict[str, float]:
"""Calculate word frequencies with comparison logic, excluding specific words"""
total_words = 0
global_counts = {}
# Process other chats with filtering
for chat in other_chats:
conv = Conversation(chat)
word_counts = conv.count_words()
# Filter excluded words
filtered_counts = {
word: count
for word, count in word_counts.items()
if word not in excluded_words
}
total_words += sum(filtered_counts.values())
for word, count in filtered_counts.items():
global_counts[word] = global_counts.get(word, 0) + count
# Calculate probabilities
global_probs = {}
if total_words:
global_probs = {word: count / total_words for word,
count in global_counts.items()}
# Process selected chat with filtering
selected_counts_raw = selected_conv.count_words()
selected_counts = {
word: count
for word, count in selected_counts_raw.items()
if word not in excluded_words
}
# Prevent division by zero
selected_total = sum(selected_counts.values()) or 1
if not other_chats:
return {word: count / selected_total for word, count in selected_counts.items()}
# Calculate frequency ratios
all_words = set(selected_counts) | set(global_probs)
return {
word: (selected_counts.get(word, 0) / selected_total) /
(global_probs.get(word, 1e-10))
for word in all_words
}
def generate_unique_filename(path: str) -> str:
"""Generates unique filename with suffixes (1), (2) and etc."""
counter = 1
original_path = Path(path)
stem = original_path.stem
suffix = original_path.suffix
while original_path.exists():
new_name = f"{stem}({counter}){suffix}"
original_path = original_path.with_name(new_name)
counter += 1
return str(original_path)
class ColorConfig:
"""Color config from file or uses default"""
DEFAULT_CONFIG = {
"color_palettes": {
"light_background": ["#0A0A0A", "#2D4261", "#4A1D32", "#1D4A3F", "#3D2B56"],
"dark_background": ["#FAFAFA", "#D6E4F0", "#F5D6E4", "#D6F5E4", "#E4D6F5"]
}
}
def __init__(self, config_path: str = None):
self.config = self.DEFAULT_CONFIG
if config_path and Path(config_path).exists():
try:
with open(config_path, encoding='utf-8') as f:
self.config = json.load(f)
self._validate()
except (FileNotFoundError, IOError) as e:
print(f"Error loading config: {e}. Using default colors")
def _validate(self):
required = ["light_background", "dark_background"]
if not all(key in self.config["color_palettes"] for key in required):
raise ValueError("Invalid color palettes in config")
class ContrastColorFunc:
"""Color func for contrast text colors with variations"""
def __init__(self, bg_color: str, config: ColorConfig):
self.bg_luminance = self.calculate_luminance(bg_color)
self.palette_type = "light_background" if self.bg_luminance > 0.5 else "dark_background"
self.colors = config.config["color_palettes"][self.palette_type]
@staticmethod
def calculate_luminance(color: str) -> float:
"""Calculate lumanance value of given color 0..1"""
try:
rgb = mcolors.to_rgb(color)
except ValueError:
return 0
return 0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]
def generate_palette(self):
"""Generate contrasting color palette"""
if self.bg_luminance > 0.5:
# Dark colors for light background
return [
'#0A0A0A', '#2D4261', '#4A1D32',
'#1D4A3F', '#3D2B56', '#2B5647'
]
else:
# Light colors for dark background
return [
'#FAFAFA', '#D6E4F0', '#F5D6E4',
'#D6F5E4', '#E4D6F5', '#E4F5D6'
]
def __call__(self, word, **kwargs):
"""Random color from palette for each word"""
return np.random.choice(self.colors)