Skip to content

Commit

Permalink
Merge pull request #374 from Mahos-H/Huffman-compressor
Browse files Browse the repository at this point in the history
Soham Haldar| Huffman encoder( huffman.cpp, compressor.py, decompress…
  • Loading branch information
josharsh authored Mar 14, 2024
2 parents d850b3c + c88bd81 commit eed107e
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 0 deletions.
48 changes: 48 additions & 0 deletions Huffman compressor/compressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import math
def compress(ifile, codes, ofile):
symbol_codes = {}
with open(codes, 'r') as f:
for line in f:
last_space_index = line.rfind(' ')
if last_space_index != -1:
symbol = line[:last_space_index]
code = line[last_space_index + 1:].strip()
symbol_codes[symbol] = code

with open(ifile, 'r') as f:
itext = f.read().strip()
compressed = ''
for c in itext:
if c.isalpha() or c.isspace():
c=c.upper()
if c in symbol_codes:
compressed += symbol_codes[c]
with open(ofile, 'w') as f:
f.write(compressed)
entropy(itext,compressed)
def entropy(a,b):
D = {}
E = {}
for i in a:
if i not in D:
D[i]=1
else:
D[i]+=1
for i in b:
if i not in E:
E[i]=1
else:
E[i]+=1
entropya=0
for i in D:
entropya+=(D[i]/len(a))*math.log2((len(a)/D[i]))
entropyb=0
for i in E:
entropyb+=(E[i]/len(b))*math.log2((len(b)/E[i]))
print("The information gain from compression is: ",entropya-entropyb)

ifile = input("Enter your input filename: ")
codes = input("Enter the filename where you want to save your codes: ")
ofile = input("Enter the filename where you want your compressed document saved: ")
compress(ifile, codes, ofile)
print("Compressed code has been written to:", ofile)
27 changes: 27 additions & 0 deletions Huffman compressor/decompressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
def decompress(ifile, codes,ofile):
symbol_codes = {}
with open(codes, 'r') as f:
for line in f:
last_space_index = line.rfind(' ')
if last_space_index != -1:
symbol = line[:last_space_index]
code = line[last_space_index + 1:].strip()
symbol_codes[code] = symbol
with open(ifile, 'r') as f:
compressed = f.readline()
decompressed = ''
current = ''
for bit in compressed:
current += bit
if current in symbol_codes.keys():
decompressed += symbol_codes[current]
current = ''
with open(ofile, 'w') as f:
f.write(decompressed)

ifile = input("Enter your input (compressed) filename: ")
codes = input("Enter the filename where you have saved your codes: ")
ofile = input("Enter the filename where you want your decompressed document saved: ")
decompress(ifile, codes,ofile)
print("Decompressed code has been written to:", ofile)

95 changes: 95 additions & 0 deletions Huffman compressor/huffman.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <string>
#include <queue>
using namespace std;
struct Node{
char symbol;
unsigned freq;
Node *left;
Node *right;

Node(char symbol, unsigned freq) : symbol(symbol), freq(freq), left(nullptr), right(nullptr) {}
};

struct comp{
bool operator()(Node *l, Node *r){
return l->freq > r->freq;
}
};

void printCodes(Node *root, string str, unordered_map<char, string> &huffmanCodes){
if (!root){
return;
}
printCodes(root->left, str + "0", huffmanCodes);
printCodes(root->right, str + "1", huffmanCodes);
if (!root->left && !root->right){
huffmanCodes[root->symbol] = str;
}
}
unordered_map<char, string> generateHuffmanCodes(istream &input) {
unordered_map<char, unsigned> freq;
string line;
while (getline(input, line)) {
for (char c : line) {
if (isalpha(c) || isspace(c)) {
freq[toupper(c)]++;
}
}
}

priority_queue<Node *, vector<Node *>, comp> pq;
for (const auto &pair : freq) {
pq.push(new Node(pair.first, pair.second));
}
while (pq.size() > 1) {
Node *left = pq.top();
pq.pop();
Node *right = pq.top();
pq.pop();

Node *mergedNode = new Node('$', left->freq + right->freq);
mergedNode->left = left;
mergedNode->right = right;

pq.push(mergedNode);
}
Node *root = pq.top();
unordered_map<char, string> huffmanCodes;
queue<pair<Node *, string>> q;
q.push({root, ""});
while (!q.empty()) {
Node *current = q.front().first;
string currentCode = q.front().second;
q.pop();
if (current->left) {
q.push({current->left, currentCode + "0"});
}
if (current->right) {
q.push({current->right, currentCode + "1"});
}
if (!current->left && !current->right) {
huffmanCodes[current->symbol] = currentCode;
}
}
delete root;
return huffmanCodes;
}
int main(){
string file;
cout<<"Enter file name: ";//Your input text file goes here
getline(cin, file);
ifstream input (file);
string ofile;
cout<<"Enter your output file name, where your codes will be saved: ";//Your output text file goes here
getline(cin, ofile);
ofstream outputFile(ofile);
unordered_map<char, string> huffmanCodes = generateHuffmanCodes(input);
for (const auto &pair : huffmanCodes) {
cout << pair.first << " " << pair.second << "\n";
outputFile << pair.first << " " << pair.second << "\n";
}
return 0;
}

0 comments on commit eed107e

Please sign in to comment.