Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ add_executable(NeuralNetwork tests/neural_network/NeuralNetworkTest.cpp)
target_compile_definitions(NeuralNetwork PRIVATE TEST_NEURAL_NETWORK)
target_link_libraries(NeuralNetwork cpp_ml_library)

add_executable(Eclat tests/association/EclatTest.cpp)
target_compile_definitions(Eclat PRIVATE TEST_ECLAT)
target_link_libraries(Eclat cpp_ml_library)

# Register individual tests
add_test(NAME LogisticRegressionTest COMMAND LogisticRegressionTest)
add_test(NAME PolynomialRegressionTest COMMAND PolynomialRegressionTest)
Expand All @@ -91,6 +95,7 @@ add_test(NAME KNNRegressor COMMAND KNNRegressor)
add_test(NAME HierarchicalClustering COMMAND HierarchicalClustering)
add_test(NAME SupportVectorRegression COMMAND SupportVectorRegression)
add_test(NAME NeuralNetwork COMMAND NeuralNetwork)
add_test(NAME Eclat COMMAND Eclat)


# Add example executables if BUILD_EXAMPLES is ON
Expand Down Expand Up @@ -130,6 +135,8 @@ if(BUILD_EXAMPLES)
target_compile_definitions(${EXAMPLE_TARGET} PRIVATE TEST_SUPPORT_VECTOR_REGRESSION)
elseif(EXAMPLE_NAME STREQUAL "NeuralNetworkExample")
target_compile_definitions(${EXAMPLE_TARGET} PRIVATE TEST_NEURAL_NETWORK)
elseif(EXAMPLE_NAME STREQUAL "EclatExample")
target_compile_definitions(${EXAMPLE_TARGET} PRIVATE TEST_ECLAT)
endif()
endforeach()
endif()
44 changes: 44 additions & 0 deletions examples/EclatExample.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "../ml_library_include/ml/association/Eclat.hpp"
#include <iostream>

void testEclat() {
// Sample transactions
std::vector<std::vector<int>> transactions = {
{1, 2, 5},
{2, 4},
{2, 3},
{1, 2, 4},
{1, 3},
{2, 3},
{1, 3},
{1, 2, 3, 5},
{1, 2, 3}
};

// Minimum support threshold (e.g., 22% of total transactions)
double min_support = 0.22;

// Create Eclat object
Eclat eclat(min_support);

// Run Eclat algorithm
std::vector<std::vector<int>> frequent_itemsets = eclat.run(transactions);

// Get support counts
auto support_counts = eclat.get_support_counts();

// Display frequent itemsets and their support counts
std::cout << "Frequent Itemsets:\n";
for (const auto& itemset : frequent_itemsets) {
std::cout << "Itemset: { ";
for (int item : itemset) {
std::cout << item << " ";
}
std::cout << "} - Support: " << support_counts.at(itemset) << "\n";
}
}

int main() {
testEclat();
return 0;
}
159 changes: 159 additions & 0 deletions ml_library_include/ml/association/Eclat.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#ifndef ECLAT_HPP
#define ECLAT_HPP

#include <map>
#include <vector>
#include <algorithm>
#include <iostream>
#include <string>
#include <cmath>
#include <stdexcept>

/**
* @file Eclat.hpp
* @brief Optimized Implementation of the Eclat algorithm for frequent itemset mining.
*/

/**
* @class Eclat
* @brief Class to perform frequent itemset mining using the Eclat algorithm.
*/
class Eclat {
public:
/**
* @brief Constructor for the Eclat class.
* @param min_support Minimum support threshold (as a fraction between 0 and 1).
*/
Eclat(double min_support);

/**
* @brief Runs the Eclat algorithm on the provided dataset.
* @param transactions A vector of transactions, each transaction is a vector of items.
* @return A vector of frequent itemsets, where each itemset is represented as a vector of items.
*/
std::vector<std::vector<int>> run(const std::vector<std::vector<int>>& transactions);

/**
* @brief Gets the support counts for all frequent itemsets found.
* @return A map where keys are itemsets (as vectors) and values are support counts.
*/
std::map<std::vector<int>, int> get_support_counts() const;

private:
/**
* @brief Recursively mines frequent itemsets using the Eclat algorithm.
* @param prefix The current itemset prefix.
* @param items A vector of items to consider.
* @param tid_sets A map from items to their transaction ID vectors.
*/
void eclat_recursive(const std::vector<int>& prefix,
const std::vector<int>& items,
const std::map<int, std::vector<int>>& tid_sets);

double min_support; ///< Minimum support threshold.
int min_support_count; ///< Minimum support count (absolute number of transactions).
int total_transactions; ///< Total number of transactions.
std::map<std::vector<int>, int> support_counts; ///< Support counts for itemsets.
};

Eclat::Eclat(double min_support)
: min_support(min_support), min_support_count(0), total_transactions(0) {
if (min_support <= 0.0 || min_support > 1.0) {
throw std::invalid_argument("min_support must be between 0 and 1.");
}
}

std::vector<std::vector<int>> Eclat::run(const std::vector<std::vector<int>>& transactions) {
total_transactions = static_cast<int>(transactions.size());
min_support_count = static_cast<int>(std::ceil(min_support * total_transactions));

// Map each item to its TID vector
std::map<int, std::vector<int>> item_tidsets;
for (int tid = 0; tid < total_transactions; ++tid) {
for (int item : transactions[tid]) {
item_tidsets[item].push_back(tid);
}
}

// Sort TID vectors
for (auto& [item, tids] : item_tidsets) {
std::sort(tids.begin(), tids.end());
}

// Filter items that meet the minimum support
std::vector<int> frequent_items;
for (const auto& [item, tidset] : item_tidsets) {
if (static_cast<int>(tidset.size()) >= min_support_count) {
frequent_items.push_back(item);
}
}

// Sort items for consistent order
std::sort(frequent_items.begin(), frequent_items.end());

// Initialize support counts for single items
for (int item : frequent_items) {
std::vector<int> itemset = {item};
support_counts[itemset] = static_cast<int>(item_tidsets[item].size());
}

// Start recursive mining
eclat_recursive({}, frequent_items, item_tidsets);

// Collect frequent itemsets from support counts
std::vector<std::vector<int>> frequent_itemsets;
for (const auto& [itemset, count] : support_counts) {
if (count >= min_support_count) {
frequent_itemsets.push_back(itemset);
}
}

return frequent_itemsets;
}

void Eclat::eclat_recursive(const std::vector<int>& prefix,
const std::vector<int>& items,
const std::map<int, std::vector<int>>& tid_sets) {
size_t n = items.size();
for (size_t i = 0; i < n; ++i) {
int item = items[i];
std::vector<int> new_prefix = prefix;
new_prefix.push_back(item);

// Update support counts
int support = static_cast<int>(tid_sets.at(item).size());
support_counts[new_prefix] = support;

// Generate new combinations
std::vector<int> remaining_items;
std::map<int, std::vector<int>> new_tid_sets;

for (size_t j = i + 1; j < n; ++j) {
int next_item = items[j];

// Intersect TID sets
std::vector<int> intersect_tid_set;
const auto& tid_set1 = tid_sets.at(item);
const auto& tid_set2 = tid_sets.at(next_item);
std::set_intersection(tid_set1.begin(), tid_set1.end(),
tid_set2.begin(), tid_set2.end(),
std::back_inserter(intersect_tid_set));

if (static_cast<int>(intersect_tid_set.size()) >= min_support_count) {
remaining_items.push_back(next_item);
new_tid_sets[next_item] = std::move(intersect_tid_set);
}
}

// Recursive call
if (!remaining_items.empty()) {
eclat_recursive(new_prefix, remaining_items, new_tid_sets);
}
}
}

std::map<std::vector<int>, int> Eclat::get_support_counts() const {
return support_counts;
}

#endif // ECLAT_HPP
63 changes: 63 additions & 0 deletions tests/association/EclatTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include "../../ml_library_include/ml/association/Eclat.hpp"
#include <iostream>
#include <vector>
#include <cassert>
#include <string>

int main() {
// Sample dataset with transactions
std::vector<std::vector<int>> transactions = {
{1, 2, 5},
{2, 4},
{2, 3},
{1, 2, 4},
{1, 3},
{2, 3},
{1, 3},
{1, 2, 3, 5},
{1, 2, 3}
};

// Minimum support threshold (e.g., 22% of total transactions)
double min_support = 0.22;

// Create the Eclat model with the minimum support
Eclat eclat(min_support);

// Run Eclat algorithm to obtain frequent itemsets
std::vector<std::vector<int>> frequent_itemsets = eclat.run(transactions);

// Get support counts
auto support_counts = eclat.get_support_counts();

// Expected frequent itemsets for validation (sample expected output)
std::vector<std::vector<int>> expected_frequent_itemsets = {
{1, 2}, {2, 3}, {1, 3}, {1, 2, 3}
// Add other expected itemsets based on expected results for the given min_support
};

// Verify that each expected itemset appears in the results
for (const auto& expected_set : expected_frequent_itemsets) {
assert(std::find(frequent_itemsets.begin(), frequent_itemsets.end(), expected_set) != frequent_itemsets.end() &&
"Expected frequent itemset missing from results.");
}

// Display the results for verification
std::cout << "Frequent Itemsets:\n";
for (const auto& itemset : frequent_itemsets) {
std::cout << "Itemset: { ";
for (int item : itemset) {
std::cout << item << " ";
}
std::cout << "} - Support: " << support_counts.at(itemset) << "\n";

// Verify support is above the minimum support threshold
double support_ratio = static_cast<double>(support_counts.at(itemset)) / transactions.size();
assert(support_ratio >= min_support && "Frequent itemset does not meet minimum support threshold.");
}

// Inform user of successful test
std::cout << "Eclat Association Rule Mining Basic Test passed." << std::endl;

return 0;
}
Loading