-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCode.py
80 lines (70 loc) · 3.17 KB
/
Code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#
# @file : Code.py
# @date : 13 April 2024
# @authors : Orel Adivi and Daniel Noor
#
import numpy as np
import pylcs
from abc import ABC, abstractmethod
from utils.LevenshteinDistance import levenshtein_deletion_distance
class Code(ABC):
"""A base class for code generators. Each code generator we wrote is derived from this class."""
@staticmethod
def _codeword_as_str(codeword: np.ndarray) -> str:
return ''.join([str(x) for x in codeword])
# save a new codeword and its corresponding value
def _insert_codeword(self, value: int, codeword: str) -> None:
assert 0 <= value < self.words
assert value not in self.mapping.keys()
assert codeword not in self.codewords
self.codewords.add(codeword)
self.mapping[value] = codeword
@abstractmethod
def __init__(self, length: int, words: int):
self.length = length
self.words = words
assert 1 <= words <= length
self.codewords = set()
self.mapping = {}
def encode(self, value: int) -> str:
assert 0 <= value < self.words
return self.mapping[value]
def decode(self, word: str) -> int:
assert len(word) <= self.length
distances = [levenshtein_deletion_distance(codeword, word) for codeword in self.mapping.values()]
return int(min(self.mapping.keys(), key=distances.__getitem__))
# compute max deletion distance of the code using DP
def max_deletions_old(self) -> int:
sorted_words = sorted(self.codewords, key=lambda x: x.count('1'))
max_lcs = 0
sorted_counts = [x.count('1') for x in sorted_words]
for i, w1 in enumerate(sorted_words):
for j, w2 in enumerate(sorted_words[0:i]):
if min(sorted_counts[i], sorted_counts[j]) \
+ min(self.length - sorted_counts[i], self.length - sorted_counts[j]) <= max_lcs:
continue
lcs = pylcs.lcs_sequence_length(w1, w2)
if lcs > max_lcs:
max_lcs = lcs
return self.length - max_lcs - 1
# improved version of the previous method, utilizing sorting to skip checking some pairs of words
def max_deletions(self) -> int:
sorted_words = sorted(self.codewords, key=lambda x: x.count('1'))
max_lcs = 0
sorted_counts = [x.count('1') for x in sorted_words]
for i, w1 in enumerate(sorted_words):
for j, w2 in enumerate(sorted_words[0:i]):
if min(sorted_counts[i], sorted_counts[j]) \
+ min(self.length - sorted_counts[i], self.length - sorted_counts[j]) <= max_lcs:
break
lcs = pylcs.lcs_sequence_length(w1, w2)
if lcs > max_lcs:
max_lcs = lcs
for j, w2 in enumerate(sorted_words[i - 1:-1:-1]):
if min(sorted_counts[i], sorted_counts[j]) \
+ min(self.length - sorted_counts[i], self.length - sorted_counts[j]) <= max_lcs:
break
lcs = pylcs.lcs_sequence_length(w1, w2)
if lcs > max_lcs:
max_lcs = lcs
return self.length - max_lcs - 1