Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import heapq

class Node:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None

def __lt__(self, other):
return self.freq < other.freq

def calc_freq(data):
freq_dict = {}
for char in data:
if char not in freq_dict:
freq_dict[char] = 0
freq_dict[char] += 1
return freq_dict

def build_tree(freq_dict):
heap = [Node(char, freq) for char, freq in freq_dict.items()]
heapq.heapify(heap)

while len(heap) > 1:
left_node = heapq.heappop(heap)
right_node = heapq.heappop(heap)
parent_node = Node(None, left_node.freq + right_node.freq)
parent_node.left = left_node
parent_node.right = right_node
heapq.heappush(heap, parent_node)

return heap[0]

def generate_codes(node, code, code_dict):
if node is None:
return

if node.char is not None:
code_dict[node.char] = code
return

generate_codes(node.left, code + '0', code_dict)
generate_codes(node.right, code + '1', code_dict)

def compress(data, code_dict):
compressed = ''
for char in data:
compressed += code_dict[char]

return compressed

def decompress(compressed, tree):
decompressed = ''
current_node = tree

for bit in compressed:
if bit == '0':
current_node = current_node.left
else:
current_node = current_node.right

if current_node.char is not None:
decompressed += current_node.char
current_node = tree

return decompressed

def shannon_fano_compression(data):
freq_dict = calc_freq(data)
tree = build_tree(freq_dict)

code_dict = {}
generate_codes(tree, '', code_dict)

compressed = compress(data, code_dict)

return compressed, tree, code_dict

def shannon_fano_decompression(compressed, tree):
decompressed = decompress(compressed, tree)

return decompressed
72 changes: 72 additions & 0 deletions test_compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import heapq

class Node:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None

def __lt__(self, other):
return self.freq < other.freq

def build_tree(data):
freq = {char: data.count(char) for char in set(data)}
nodes = [Node(char, freq[char]) for char in freq]
heapq.heapify(nodes)

while len(nodes) > 1:
left = heapq.heappop(nodes)
right = heapq.heappop(nodes)
parent = Node(None, left.freq + right.freq)
parent.left = left
parent.right = right
nodes.append(parent)
heapq.heappush(nodes, parent)

return nodes[0]

def generate_codes(node, code, codes):
if node is None:
return

if node.char is not None:
codes[node.char] = code
return

generate_codes(node.left, code + '0', codes)
generate_codes(node.right, code + '1', codes)

def compress(data):
tree = build_tree(data)
codes = {}
generate_codes(tree, '', codes)

compressed = ''.join(codes[char] for char in data)
return compressed, tree

def decompress(compressed, tree):
data = ''
node = tree

for bit in compressed:
if bit == '0':
node = node.left
else:
node = node.right

if node.char is not None:
data += node.char
node = tree

return data

def main():
data = "This is a test for Shannon-Fano compression."
compressed, tree = compress(data)
print("Original data size:", len(data))
print("Compressed data size:", len(compressed))
print("Decompressed data:", decompress(compressed, tree))

if __name__ == "__main__":
main()