Algebraic-Programming · tonibohnlein · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/include/osp/auxiliary/hash_util.hpp b/include/osp/auxiliary/hash_util.hpp
@@ -28,6 +28,15 @@ struct uniform_node_hash_func {
     result_type operator()(const VertexType& ) { return defautlVal; }
 };
 
+
+template<typename VertexType>
+struct vector_node_hash_func {
+    const std::vector<std::size_t>& node_hashes_;
+    vector_node_hash_func(const std::vector<std::size_t>& node_hashes) : node_hashes_(node_hashes) {}
+    using result_type = std::size_t;
+    result_type operator()(const VertexType& v) const { return node_hashes_[v]; }
+};
+
 template<class T>
 void hash_combine(std::size_t &seed, const T &v) {
     std::hash<T> hasher;

diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
@@ -651,6 +651,14 @@ class BspArchitecture {
         }
     }
 
+    std::vector<std::vector<unsigned>> getProcessorIdsByType() const {
+        std::vector<std::vector<unsigned>> processor_ids_by_type(number_of_processor_types);
+        for (unsigned i = 0; i < numberOfProcessors(); ++i) {
+            processor_ids_by_type[processorType(i)].push_back(i);
+        }
+        return processor_ids_by_type;
+    }
+
     inline unsigned getNumberOfProcessorTypes() const { return number_of_processor_types; };
 
     inline MEMORY_CONSTRAINT_TYPE getMemoryConstraintType() const { return memory_const_type; }

diff --git a/include/osp/bsp/model/BspScheduleCostEvaluator.hpp b/include/osp/bsp/model/BspScheduleCostEvaluator.hpp
@@ -0,0 +1,174 @@
+/*
+Copyright 2024 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include "BspSchedule.hpp"
+
+namespace osp {
+
+/**
+ * @class BspScheduleCostEvaluator
+ * @brief A class to compute various cost functions for a BspSchedule.
+ *
+ * This class wraps a BspSchedule by reference to avoid unnecessary copies
+ * while providing an interface to compute different cost models.
+ */
+template<typename Graph_t>
+class BspScheduleCostEvaluator {
+
+    static_assert(is_computational_dag_v<Graph_t>, "BspScheduleCostEvaluator can only be used with computational DAGs.");
+    static_assert(std::is_same_v<v_workw_t<Graph_t>, v_commw_t<Graph_t>>,
+                  "BspScheduleCostEvaluator requires work and comm. weights to have the same type.");
+
+  protected:
+    const BspSchedule<Graph_t>& schedule;
+    const BspInstance<Graph_t>& instance;
+
+    void compute_lazy_communication_costs_helper(std::vector<std::vector<v_commw_t<Graph_t>>> & rec, std::vector<std::vector<v_commw_t<Graph_t>>> & send) const {
+        const unsigned number_of_supersteps = schedule.numberOfSupersteps();
+        for (const auto &node : instance.vertices()) {
+
+            std::vector<unsigned> step_needed(instance.numberOfProcessors(), number_of_supersteps);
+            for (const auto &target : instance.getComputationalDag().children(node)) {
+
+                if (schedule.assignedProcessor(node) != schedule.assignedProcessor(target)) {
+                    step_needed[schedule.assignedProcessor(target)] = std::min(
+                        step_needed[schedule.assignedProcessor(target)], schedule.assignedSuperstep(target));
+                }
+            }
+
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+
+                if (step_needed[proc] < number_of_supersteps) {
+
+                    send[schedule.assignedProcessor(node)][step_needed[proc] - 1] +=
+                        instance.sendCosts(schedule.assignedProcessor(node), proc) *
+                        instance.getComputationalDag().vertex_comm_weight(node);
+
+                    rec[proc][step_needed[proc] - 1] += instance.sendCosts(schedule.assignedProcessor(node), proc) *
+                                                        instance.getComputationalDag().vertex_comm_weight(node);
+                }
+            }
+        }
+    }
+
+    std::vector<v_commw_t<Graph_t>> compute_max_comm_per_step_helper(const std::vector<std::vector<v_commw_t<Graph_t>>> & rec, const std::vector<std::vector<v_commw_t<Graph_t>>> & send) const {
+        const unsigned number_of_supersteps = schedule.numberOfSupersteps();
+        std::vector<v_commw_t<Graph_t>> max_comm_per_step(number_of_supersteps, 0);
+        for (unsigned step = 0; step < number_of_supersteps; step++) {
+            v_commw_t<Graph_t> max_send = 0;
+            v_commw_t<Graph_t> max_rec = 0;
+
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+                if (max_send < send[proc][step])
+                    max_send = send[proc][step];
+                if (max_rec < rec[proc][step])
+                    max_rec = rec[proc][step];
+            }
+            max_comm_per_step[step] = std::max(max_send, max_rec) * instance.communicationCosts();
+        }
+        return max_comm_per_step;
+    }
+
+    std::vector<v_workw_t<Graph_t>> compute_max_work_per_step_helper() const {
+        const unsigned number_of_supersteps = schedule.numberOfSupersteps();
+        std::vector<std::vector<v_workw_t<Graph_t>>> work = std::vector<std::vector<v_workw_t<Graph_t>>>(
+            number_of_supersteps, std::vector<v_workw_t<Graph_t>>(instance.numberOfProcessors(), 0));
+        for (const auto &node : instance.vertices()) {
+            work[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] +=
+                instance.getComputationalDag().vertex_work_weight(node);
+        }
+
+        std::vector<v_workw_t<Graph_t>> max_work_per_step(number_of_supersteps, 0);
+        for (unsigned step = 0; step < number_of_supersteps; step++) {
+            v_workw_t<Graph_t> max_work = 0;
+            for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) {
+                if (max_work < work[step][proc]) {
+                    max_work = work[step][proc];
+                }
+            }
+
+            max_work_per_step[step] = max_work;
+        }
+
+        return max_work_per_step;
+    }
+
+  public:
+    /**
+     * @brief Construct a new Bsp Schedule Cost Evaluator object.
+     *
+     * @param sched The BspSchedule to evaluate.
+     */
+    BspScheduleCostEvaluator(const BspSchedule<Graph_t>& sched) : schedule(sched), instance(sched.getInstance()) {}
+
+    /**
+     * @brief Computes the communication costs using the lazy sending model.
+     *
+     * In the lazy sending model, data is sent in the superstep immediately
+     * preceding the superstep where it is first needed.
+     *
+     * @return The lazy communication costs.
+     */
+    v_commw_t<Graph_t> compute_lazy_communication_costs() const {
+
+        const unsigned number_of_supersteps = schedule.numberOfSupersteps();
+
+        std::vector<std::vector<v_commw_t<Graph_t>>> rec(instance.numberOfProcessors(),
+                                                         std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
+        std::vector<std::vector<v_commw_t<Graph_t>>> send(instance.numberOfProcessors(),
+                                                          std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
+
+        compute_lazy_communication_costs_helper(rec, send);
+        const std::vector<v_commw_t<Graph_t>> max_comm_per_step = compute_max_comm_per_step_helper(rec, send);
+
+        v_commw_t<Graph_t> costs = 0;
+        for (unsigned step = 0; step < number_of_supersteps; step++) {
+            const auto step_comm_cost = max_comm_per_step[step];
+            costs += step_comm_cost;
+
+            costs += instance.synchronisationCosts();
+
+        }
+
+        return costs;
+    }
+
+    /**
+     * @brief Computes the total work costs of the schedule.
+     *
+     * The work cost is the sum of the maximum work done in each superstep
+     * across all processors.
+     *
+     * @return The total work costs.
+     */
+    v_workw_t<Graph_t> computeWorkCosts() const {
+        const std::vector<v_workw_t<Graph_t>> work_per_step = compute_max_work_per_step_helper();
+        return std::accumulate(work_per_step.begin(), work_per_step.end(), static_cast<v_workw_t<Graph_t>>(0));
+    }
+
+    /**
+     * @brief Computes the total costs of the schedule using the lazy communication model.
+     *
+     * @return The total costs.
+     */
+    v_workw_t<Graph_t> computeCosts() const { return compute_lazy_communication_costs() + computeWorkCosts(); }
+};
+
+} // namespace osp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp
@@ -0,0 +1,93 @@
+/*
+Copyright 2024 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include "osp/bsp/model/BspScheduleCostEvaluator.hpp"
+#include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/bsp/scheduler/Serial.hpp"
+#include <vector>
+#include <string>
+
+namespace osp {
+
+
+/**
+ * @class GreedyMetaScheduler
+ * @brief The GreedyMetaScheduler class represents a meta-scheduler that selects the best schedule produced from a list of
+ * added schedulers.
+ *
+ * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods.
+ * The computeSchedule() method iterates through a list of schedulers, computes a schedule using each one,
+ * and returns the schedule with the minimum cost.
+ */
+template<typename Graph_t>
+class GreedyMetaScheduler : public Scheduler<Graph_t> {
+
+    Serial<Graph_t> serial_scheduler_;
+    std::vector<Scheduler<Graph_t>*> schedulers_;
+
+    static constexpr bool verbose = false;
+
+  public:
+    /**
+     * @brief Default constructor for MetaScheduler.
+     */
+    GreedyMetaScheduler() : Scheduler<Graph_t>() {}
+
+    /**
+     * @brief Default destructor for MetaScheduler.
+     */
+    ~GreedyMetaScheduler() override = default;
+
+    void addSerialScheduler() { schedulers_.push_back(&serial_scheduler_); }
+    void addScheduler(Scheduler<Graph_t> & s) { schedulers_.push_back(&s); }
+    void resetScheduler() { schedulers_.clear(); }
+
+    RETURN_STATUS computeSchedule(BspSchedule<Graph_t> &schedule) override {
+        if (schedule.getInstance().getArchitecture().numberOfProcessors() == 1) {
+            if constexpr (verbose) std::cout << "Using serial scheduler for P=1." << std::endl;
+            serial_scheduler_.computeSchedule(schedule);
+            return RETURN_STATUS::OSP_SUCCESS;
+        }
+
+        v_workw_t<Graph_t> best_schedule_cost = std::numeric_limits<v_workw_t<Graph_t>>::max(); 
+        BspSchedule<Graph_t> current_schedule(schedule.getInstance());
+
+        for (Scheduler<Graph_t>* scheduler : schedulers_) {
+            scheduler->computeSchedule(current_schedule);
+            BspScheduleCostEvaluator<Graph_t> evaluator(current_schedule);
+            const v_workw_t<Graph_t> schedule_cost = evaluator.computeCosts();
+
+            if constexpr (verbose) std::cout << "Executed scheduler " << scheduler->getScheduleName() << ", costs: " << schedule_cost << ", nr. supersteps: " << current_schedule.numberOfSupersteps() << std::endl;
+
+            if (schedule_cost < best_schedule_cost) {
+                best_schedule_cost = schedule_cost;
+                schedule = current_schedule;
+                if constexpr (verbose) std::cout << "New best schedule!" << std::endl;     
+            }
+
+        }
+
+        return RETURN_STATUS::OSP_SUCCESS;
+    }
+
+    std::string getScheduleName() const override { return "GreedyMetaScheduler"; }
+};
+
+} // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
@@ -1067,9 +1067,8 @@ class kl_improver : public ImprovementScheduler<Graph_t> {
         return improveSchedule(schedule);
     }
 
-    void set_compute_with_time_limit(bool compute_with_time_limit_) {
-        compute_with_time_limit = compute_with_time_limit_;
-    }
+    virtual void setTimeQualityParameter(const double time_quality) { this->parameters.time_quality = time_quality; }
+    virtual void setSuperstepRemoveStrengthParameter(const double superstep_remove_strength) { this->parameters.superstep_remove_strength = superstep_remove_strength; }
 
     virtual std::string getScheduleName() const {
         return "kl_improver_" + comm_cost_f.name();

diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp
@@ -99,7 +99,8 @@ class kl_improver_mt : public kl_improver<Graph_t, comm_cost_function_t, MemoryC
 
   public:
 
-  kl_improver_mt() : kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>() { }
+    kl_improver_mt() : kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>() {}
+    explicit kl_improver_mt(unsigned seed) : kl_improver<Graph_t, comm_cost_function_t, MemoryConstraint_t, window_size, cost_t>(seed) {}
     virtual ~kl_improver_mt() = default;
 
     void set_max_num_threads(const unsigned num_threads) {