Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 219 additions & 0 deletions pabutools/rules/gpseq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
"""
Implementation of the GPseq algorithm from:

"Proportionally Representative Participatory Budgeting: Axioms and Algorithms"
by Haris Aziz, Bettina Klaus, Jérôme Lang, and Markus Brill (2017)
https://arxiv.org/abs/1711.08226

Programmer: <Shlomi Asraf>
Date: 2025-05-13
"""

from __future__ import annotations
import logging
import numpy as np
from pabutools.fractions import frac
from pabutools.election.instance import Instance, Project
from pabutools.election.profile import AbstractApprovalProfile
from pabutools.rules.budgetallocation import BudgetAllocation
from pabutools.tiebreaking import TieBreakingRule, lexico_tie_breaking

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")


def gpseq(
instance: Instance,
profile: AbstractApprovalProfile,
tie_breaking: TieBreakingRule = lexico_tie_breaking
) -> BudgetAllocation:
"""
Algorithm 6: GPseq - Greedy Phragmen Sequence algorithm.

Selects a subset of projects to minimize the maximal load on any voter.

Parameters
----------
instance : Instance
The instance including all projects and the total budget.
profile : ApprovalProfile
The approval profile of the voters.
tie_breaking : TieBreakingRule, optional
The rule to apply when multiple projects have equal impact.

Returns
-------
List[Project]
A list of selected projects.

Examples
--------
Example 1:
Algorithm 6: GPseq - Greedy Phragmen Sequence algorithm.

Selects a subset of projects to minimize the maximal load on any voter.

Parameters
----------
instance : Instance
The instance including all projects and the total budget.
profile : ApprovalProfile
The approval profile of the voters.
tie_breaking : TieBreakingRule, optional
The rule to apply when multiple projects have equal impact.

Returns
-------
List[Project]
A list of selected projects.

Examples
--------
>>> p1 = Project("c1", cost=2)
>>> p2 = Project("c2", cost=2)
>>> p3 = Project("c3", cost=1)
>>> instance = Instance([p1, p2, p3], budget_limit=3)
>>> profile = ApprovalProfile([ApprovalBallot([p1]), ApprovalBallot([p1]), ApprovalBallot([p2]), ApprovalBallot([p2])])
>>> result = gpseq(instance, profile)
>>> [p.name for p in result]
['c1', 'c3']

>>> p1 = Project("c1", cost=1)
>>> instance = Instance([p1], budget_limit=1)
>>> profile = ApprovalProfile([ApprovalBallot([p1])])
>>> result = gpseq(instance, profile)
>>> [p.name for p in result]
['c1']

>>> p1 = Project("c1", cost=1)
>>> p2 = Project("c2", cost=2)
>>> instance = Instance([p1, p2], budget_limit=2)
>>> profile = ApprovalProfile([ApprovalBallot([p1]), ApprovalBallot([p1]), ApprovalBallot([p1]), ApprovalBallot([p2])])
>>> result = gpseq(instance, profile)
>>> [p.name for p in result]
['c1']

>>> p1 = Project("c1", cost=2)
>>> p2 = Project("c2", cost=1.5)
>>> p3 = Project("c3", cost=1.5)
>>> instance = Instance([p1, p2, p3], budget_limit=3)
>>> profile = ApprovalProfile([ApprovalBallot([p1, p2]), ApprovalBallot([p1, p2]), ApprovalBallot([p1, p2]), ApprovalBallot([p1, p2]), ApprovalBallot([p3]), ApprovalBallot([p3])])
>>> result = gpseq(instance, profile)
>>> [p.name for p in result]
['c2', 'c3']

>>> p1 = Project("c1", cost=2)
>>> p2 = Project("c2", cost=2)
>>> p3 = Project("c3", cost=0.8)
>>> instance = Instance([p1, p2, p3], budget_limit=2)
>>> profile = ApprovalProfile([ApprovalBallot([p1, p2]), ApprovalBallot([p1, p2]), ApprovalBallot([p1, p2]), ApprovalBallot([p1, p2]), ApprovalBallot([p3]), ApprovalBallot([p3])])
>>> result = gpseq(instance, profile)
>>> [p.name for p in result]
['c3']

>>> p1 = Project("c1", cost=1.5)
>>> p2 = Project("c2", cost=1.5)
>>> p3 = Project("c3", cost=1.0)
>>> instance = Instance([p1, p2, p3], budget_limit=3)
>>> profile = ApprovalProfile([ApprovalBallot([p1, p2]), ApprovalBallot([p1]), ApprovalBallot([p2, p3]), ApprovalBallot([p3])])
>>> result = gpseq(instance, profile)
>>> [p.name for p in result]
['c3', 'c1']
"""
logging.info("Starting GPseq algorithm")

if hasattr(profile, "is_multiprofile") and profile.is_multiprofile():
raise ValueError("GPseq currently does not support MultiProfile")

logging.info("Starting GPseq algorithm")
for project in instance:
if project.cost < 0:
raise ValueError(f"Project {project.name} has negative cost: {project.cost}")

logging.info(f"Initial budget: {instance.budget_limit}")
logging.info(f"Projects: {[f'{p.name} (cost={p.cost})' for p in instance]}")
logging.info(f"Profile: {[[p.name for p in ballot] for ballot in profile]}")

budget = instance.budget_limit
remaining_budget = budget
selected_projects: list[Project] = []
current_loads = np.zeros(profile.num_ballots())
available_projects = set(instance)

while True:
approvers_map = {
p: [i for i, ballot in enumerate(profile) if p in ballot]
for p in available_projects
if p.cost <= remaining_budget and any(p in ballot for ballot in profile)
}

logging.debug(f"Feasible projects this round: {[p.name for p in approvers_map]}")
if not approvers_map:
logging.info("No more feasible approved projects. Exiting main loop.")
break

project_to_load = {
p: compute_load(p, approvers_map[p], current_loads)
for p in approvers_map
}

min_load = min(project_to_load.values())
candidates = [p for p in project_to_load if project_to_load[p] == min_load]
logging.debug(f"Minimum load: {min_load}, Candidates: {[p.name for p in candidates]}")

chosen = tie_breaking.untie(instance, profile, candidates)
logging.info(f"Chosen project: {chosen.name} with cost {chosen.cost} and {min_load} max load")

selected_projects.append(chosen)
remaining_budget -= chosen.cost
approvers = approvers_map[chosen]
cost_per_voter = frac(chosen.cost, len(approvers))
for voter in approvers:
current_loads[voter] += float(cost_per_voter)

logging.debug(f"Updated voter loads: {current_loads}")
logging.debug(f"Remaining budget: {remaining_budget}")
available_projects.remove(chosen)

logging.info("Starting post-processing step")
for p in sorted(available_projects, key=lambda x: x.name):
if p.cost <= remaining_budget:
selected_projects.append(p)
remaining_budget -= p.cost
logging.info(f"Post-processed addition: {p.name}, remaining budget: {remaining_budget}")

logging.info(f"Final selected projects: {[p.name for p in selected_projects]}")
return BudgetAllocation(selected_projects)

def compute_load(project: Project, approvers: list[int], current_loads: np.ndarray) -> float:
"""
Computes the new maximal load if we add the given project,
distributing its cost evenly among its supporters.

Parameters
----------
project : Project
The project being considered.
approvers : List[int]
The list of voter indices who approve this project.
current_loads : np.ndarray
The current load vector of all voters.

Returns
-------
float
The maximal load after distributing the project cost among its approvers.

Examples
--------
>>> project = Project("c1", cost=2)
>>> compute_load(project, [0, 1], np.array([0.0, 0.0]))
1.0
"""
if not approvers:
return float('inf')
cost_per_voter = float(frac(project.cost, len(approvers)))
new_loads = current_loads.copy()
for voter in approvers:
new_loads[voter] += cost_per_voter
logging.info(f"project: {project.name} with cost {project.cost} and {float(max(new_loads))} max load")
return float(max(new_loads))
77 changes: 77 additions & 0 deletions tests/test_gpseq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import unittest
from pabutools.election.instance import Project, Instance
from pabutools.election.profile import ApprovalProfile
from pabutools.election.ballot.approvalballot import ApprovalBallot
from pabutools.rules.gpseq import gpseq


class TestGPseq(unittest.TestCase):

def test_empty_input(self):
instance = Instance([], 10)
profile = ApprovalProfile([])
result = gpseq(instance, profile)
self.assertEqual(result, [])

def test_invalid_input(self):
p1 = Project("c1", cost=-5)
instance = Instance([p1], 1)
profile = ApprovalProfile([ApprovalBallot([p1])])
with self.assertRaises(Exception):
gpseq(instance, profile)

def test_small_input(self):
p1 = Project("c1", cost=1)
instance = Instance([p1], 1)
profile = ApprovalProfile([ApprovalBallot([p1])])
result = gpseq(instance, profile)
self.assertEqual([p.name for p in result], ["c1"])

def test_medium_input(self):
p1 = Project("c1", cost=1.5)
p2 = Project("c2", cost=1.5)
p3 = Project("c3", cost=1.0)
instance = Instance([p1, p2, p3], 3)
profile = ApprovalProfile([
ApprovalBallot([p1, p2]),
ApprovalBallot([p1]),
ApprovalBallot([p2, p3]),
ApprovalBallot([p2]),
])
result = gpseq(instance, profile)
names = set(p.name for p in result)
self.assertTrue(names == {"c2", "c1"})

def test_large_input_random_structure_with_fairness(self):
import random
num_projects = 30
projects = [Project(f"c{i + 1}", cost=random.randint(1, 5)) for i in range(num_projects)]
instance = Instance(projects, 50)

approvals = [
ApprovalBallot(random.sample(projects, random.randint(3, 5)))
for _ in range(100)
]
profile = ApprovalProfile(approvals)

result = gpseq(instance, profile)

project_to_index = {p: i for i, p in enumerate(projects)}
approval_counts = [0] * num_projects
for ballot in profile:
for project in ballot:
i = project_to_index[project]
approval_counts[i] += 1

ratios = [(approval_counts[i] / projects[i].cost, projects[i]) for i in range(num_projects)]
ratios.sort(reverse=True)
best_ratio_projects = [p for _, p in ratios[:len(result)]]

selected_names = set(p.name for p in result)
best_names = set(p.name for p in best_ratio_projects)
overlap = selected_names & best_names
self.assertGreaterEqual(len(overlap), len(result) // 2)


if __name__ == "__main__":
unittest.main()