-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 64daf24
Showing
6 changed files
with
25,756 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Auto detect text files and perform LF normalization | ||
* text=auto |
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,375 @@ | ||
import pandas as pd | ||
from itertools import combinations | ||
from math import log | ||
from math import sqrt | ||
from math import floor | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from functools import reduce | ||
import time | ||
import json | ||
import os | ||
|
||
def add(x,y): | ||
return x + y | ||
|
||
# | ||
# | ||
# definitions for a *bunch* of variations of QF | ||
# | ||
# in all of these functions, | ||
# - donation_df is expected to be a pandas dataframe where rows are wallets, columns are projects, and entries represent a wallet's total donation amount to a project | ||
# - cluster_df is expected to be a pandas dataframe where rows are wallets, columns are cluster, and entries are denote the strength of a user's membership in that cluster. | ||
# | ||
# also important to note: these functions all return the matching amounts each project should get under that variant of QF -- to get the full funding amount, | ||
# you need to add in the direct donations as well! | ||
# | ||
|
||
|
||
# first, some helper functions | ||
def binarize(df): | ||
return df.applymap(lambda x: 1 if x > 0 else 0) | ||
|
||
def align(donation_df, cluster_df): | ||
# first, drop users who haven't made any donations / aren't in any clusters | ||
cluster_df.drop(cluster_df.index[cluster_df.apply(lambda row: all(row == 0), axis=1)],inplace=True) | ||
donation_df.drop(donation_df.index[donation_df.apply(lambda row: all(row == 0), axis=1)],inplace=True) | ||
|
||
# Also remove wallets that are just in one dataframe, but not the other | ||
cluster_df.drop(set(cluster_df.index) - set(donation_df.index), inplace=True) | ||
donation_df.drop(set(donation_df.index) - set(cluster_df.index), inplace=True) | ||
|
||
#make sure the indices are sorted the same way (important for making sure the matrix multiplications work later) | ||
cluster_df.sort_index(inplace=True) | ||
donation_df.sort_index(inplace=True) | ||
|
||
return donation_df, cluster_df | ||
|
||
def check_matching_cap(col, matching_cap_percent): | ||
col = col.copy() | ||
while True: | ||
# Step 1: Identify the projects that have matching percentages exceeding the cap | ||
over_cap = np.maximum(0, col - matching_cap_percent) | ||
# Step 2: Set the matching percent to the cap percent for projects exceeding the cap | ||
col.loc[col > matching_cap_percent] = matching_cap_percent | ||
# Step 3: Calculate the total matching percent for projects not exceeding the cap | ||
total_percent_for_not_capped = col[col < matching_cap_percent].sum() | ||
# Step 4: If there isa percentage available for redistribution, redistribute the excess percentage from over-capped projects proportionally | ||
if total_percent_for_not_capped > 0: | ||
remainder_percent = over_cap.sum() / total_percent_for_not_capped | ||
col.loc[col < matching_cap_percent] *= (1 + remainder_percent) | ||
else: | ||
# If no percentage is available for redistribution, exit the loop | ||
break | ||
# Step 5: Check if the updates pushed any project over the cap, if not, exit the loop | ||
over_cap_after_update = np.maximum(0, col - matching_cap_percent) | ||
if not over_cap_after_update.sum() > 0: | ||
break | ||
# Return the updated project data | ||
return col | ||
|
||
def scale_matching(funding, matching_cap_percent, matching_amount): | ||
projects = list(funding.keys()) | ||
total_money = sum(funding.values()) | ||
funding_normalized = {p: funding[p]/total_money for p in projects} | ||
# Create DataFrame with 'project_name' and 'matching_amount' columns | ||
result = pd.DataFrame(list(funding_normalized.items()), columns=['project_name', 'matching_amount']) | ||
# Apply the cap to the 'matching_amount' column | ||
result['matching_amount'] = check_matching_cap(result['matching_amount'], matching_cap_percent) | ||
# Scale the 'matching_amount' column by the total matching amount | ||
result['matching_amount'] = result['matching_amount'] * matching_amount | ||
return result | ||
|
||
# now on to the QF variants | ||
|
||
def standard_qf(donation_df): | ||
projects = donation_df.columns | ||
funding = {p: (donation_df[p].apply(lambda x: sqrt(x)).sum() ** 2) - donation_df[p].sum() for p in projects} | ||
|
||
return funding | ||
|
||
def pairwise(donation_df, M=0.01): | ||
|
||
projects = donation_df.columns | ||
donors = donation_df.index | ||
|
||
# start off with funding = sum of individual donations, then add the pairwise matching amounts | ||
#funding = {p: donation_df[p].sum() for p in projects} | ||
funding = {p : 0 for p in projects} | ||
sqrt_donation_df = donation_df.apply(lambda col: np.sqrt(col)) | ||
|
||
# The next line of code creates a matrix containing each pairwise coefficient k_i,j | ||
# In-depth expanation: | ||
# The dot product is a matrix multiplication that will give us a matrix where entry i,j is the dot product of | ||
# i's square-rooted donation vector with j's square-rooted donation vector. | ||
# Next, even though M is technically a scalar, pandas will automatically interpret the syntax "M + <matrix>" | ||
# by assuming that M here refers to a matrix with M in every entry, and the same dimensions as the actual matrix | ||
# on the other side of the +. | ||
# Same goes for "M / <matrix>". | ||
# The result is a matrix, "k_matrix", where entry i,j is the k_i,j described in the original pairwise matching blog post | ||
k_matrix = M / (M + sqrt_donation_df.dot(sqrt_donation_df.transpose())) | ||
|
||
proj_sets = {d : set([p for p in projects if donation_df.loc[d, p] > 0]) for d in donors} | ||
|
||
for wallet1, wallet2 in combinations(donors,2): | ||
for p in proj_sets[wallet1].intersection(proj_sets[wallet2]): | ||
funding[p] += sqrt_donation_df.loc[wallet1, p] * sqrt_donation_df.loc[wallet2, p] * k_matrix.loc[wallet1, wallet2] | ||
|
||
return funding | ||
|
||
def cluster_profile_pairwise(donation_df, cluster_df): | ||
|
||
cluster_df = binarize(cluster_df) | ||
|
||
donation_df, cluster_df = align(donation_df, cluster_df) | ||
|
||
projects = donation_df.columns | ||
donors = donation_df.index | ||
clusters = cluster_df.columns | ||
cluster_members = cluster_df.index | ||
|
||
# start off with funding = sum of individual donations, then add the pairwise matching amounts | ||
#funding = {p: donation_df[p].sum() for p in projects} | ||
funding = {p : 0 for p in projects} | ||
|
||
|
||
# the pairwise matching coefficient for agents i and j is: | ||
# (# groups just i is in + # groups just j is in) / (# groups i is in + # groups j is in) | ||
|
||
# first, make a matrix whose entries are the numerators of the above formula for every pair of agents | ||
# we make it by first setting each entry to be the total number of clusters, then subracting the clusters that both i and j are in, | ||
# then subtracting the clusters that neither i nor j are in. We're left with the clusters that exactly one of i or j are in. | ||
numerator_matrix = pd.DataFrame(index=donors, columns=donors, data=len(clusters)) - cluster_df.dot(cluster_df.transpose()) - ((1-cluster_df).dot(1-cluster_df.transpose())) | ||
|
||
|
||
# now we make a matrix C representing the denominators of the above formula | ||
# A is a vector where entry i is the number of groups i is in | ||
A = cluster_df.apply(sum, axis=1) | ||
# B is a matrix where every entry in row i is the number of groups i is in | ||
B = pd.DataFrame(index=donors,columns=donors,data=[A]*len(donors)) | ||
# by adding B and its transpose, we get a matrix where entry (i,j) is the number of groups i is in + the number of groups j is in | ||
denominator_matrix = B + B.transpose() | ||
# finally, we can get the coefficient matrix by dividing the numerators by the denominators | ||
coeffs = numerator_matrix / denominator_matrix | ||
|
||
|
||
for p in projects: | ||
|
||
non_donors = donation_df[donation_df[p] == 0].index | ||
|
||
donor_only_donation_df = donation_df.drop(non_donors, axis=0) | ||
|
||
donor_only_coeffs = coeffs.drop(non_donors, axis=1).drop(non_donors, axis=0) | ||
|
||
y = donor_only_donation_df[p].apply(sqrt) | ||
z = pd.DataFrame(y) | ||
QF_matrix = z.dot(z.transpose()) | ||
funding[p] += (QF_matrix * donor_only_coeffs).sum().sum() | ||
|
||
return funding | ||
|
||
def clustermatch(donation_df, cluster_df): | ||
|
||
projects = donation_df.columns | ||
clusters = cluster_df.columns | ||
donors = donation_df.index | ||
cluster_members = cluster_df.index | ||
|
||
normalized_clusters = cluster_df.apply(lambda row: row / row.sum() if any(row) else 0, axis=1) | ||
|
||
donation_df.drop(list(set(donors) - set(cluster_members)), inplace=True) | ||
normalized_clusters.drop(list(set(cluster_members) - set(donors)), inplace=True) | ||
|
||
normalized_clusters.sort_index(inplace=True) | ||
donation_df.sort_index(inplace=True) | ||
|
||
B = donation_df.transpose().dot(normalized_clusters) | ||
|
||
# B should be a matrix where rows are projects, columns are clusters, and entry (i,j) is cluster j's donation to project i | ||
|
||
funding = {p: B.loc[p].apply(lambda x: sqrt(x)).sum() ** 2 - B.loc[p].sum() for p in projects} | ||
return funding | ||
|
||
def donation_profile_clustermatch(donation_df): | ||
# run cluster match, using donation profiles as the clusters | ||
# i.e., everyone who donated to the same set of projects gets put under the same square root. | ||
|
||
# donation_df is expected to be a pandas Dataframe where rows are unique donors, columns are projects, | ||
# and entry i,j denote user i's total donation to project j | ||
|
||
# we'll store donation profiles as binary strings. | ||
# i.e. say there are four projects total. if an agent donated to project 0, project 1, and project 3, they will be put in cluster "1101". | ||
# here the indices 0,1,2,3 refer to the ordering in the input list of projects. | ||
|
||
projects = donation_df.columns | ||
don_profiles = donation_df.apply(lambda row: ''.join('1' if row[p] > 0 else '0' for p in projects), axis=1) | ||
|
||
don_profile_df = pd.DataFrame(index=donation_df.index, columns=don_profiles.unique(), data=0) | ||
|
||
for wallet in donation_df.index: | ||
don_profile_df.loc[wallet, don_profiles[wallet]] = 1 | ||
|
||
return clustermatch(donation_df, don_profile_df) | ||
|
||
def COCM(donation_df, cluster_df, fancy=True): | ||
# run CO-CM on a set of funding amounts and clusters | ||
# if "fancy" is false, follow the formula in the whitepaper exactly. If "fancy" is true, get fancy with it. | ||
|
||
# # first, drop users who haven't made any donations / aren't in any clusters | ||
# cluster_df.drop(cluster_df.index[cluster_df.apply(lambda row: all(row == 0), axis=1)],inplace=True) | ||
# donation_df.drop(donation_df.index[donation_df.apply(lambda row: all(row == 0), axis=1)],inplace=True) | ||
|
||
# # Also remove wallets that are just in one dataframe, but not the other | ||
# cluster_df.drop(set(cluster_df.index) - set(donation_df.index), inplace=True) | ||
# donation_df.drop(set(donation_df.index) - set(cluster_df.index), inplace=True) | ||
|
||
# #make sure the indices are sorted the same way (important for making sure the matrix multiplications work later) | ||
# cluster_df.sort_index(inplace=True) | ||
# donation_df.sort_index(inplace=True) | ||
|
||
donation_df, cluster_df = align(donation_df, cluster_df) | ||
|
||
projects = donation_df.columns | ||
clusters = cluster_df.columns | ||
donors = donation_df.index | ||
cluster_members = cluster_df.index | ||
|
||
|
||
# normalize the cluster dataframe so that rows sum to 1. Now, an entry tells us the "weight" that a particular cluster has for a particular user. | ||
# if a user is in 0 clusters, their row will be a bunch of NaNs if we naively divide by 1. | ||
# we shouldn't have any such users anyways, but just in case, we'll fill such a row with 0s instead | ||
normalized_clusters = cluster_df.apply(lambda row: row / row.sum() if any(row) else 0, axis=1) | ||
|
||
binarized_clusters = binarize(cluster_df) | ||
|
||
if fancy: | ||
# friendship_matrix is a matrix whose rows and columns are both wallets, | ||
# and a value of 1 at index i,j means that wallets i and j are in at least one cluster together. | ||
friendship_matrix = cluster_df.dot(cluster_df.transpose()).apply(lambda col: col > 0) | ||
|
||
# k_indicators is a dataframe with wallets as rows and clusters as columns. | ||
# if wallet i is not in cluster g, then entry i,g is is the fraction of i's friends who are in cluster g (i's friends are the agents i is in a shared cluster with). | ||
# if wallet i is in cluster g, then entry i,g is 1. | ||
|
||
# in the past, we used cluster_df in the following line instead of binarized_clusters | ||
k_indicators = friendship_matrix.dot(binarized_clusters).apply(lambda row: row / friendship_matrix.loc[row.name].sum(), axis=1) | ||
# ... and the following line used cluster_df instead of binarized_clusters | ||
k_indicators = k_indicators.apply(lambda row: np.maximum(row, binarized_clusters.loc[row.name]), axis=1) | ||
|
||
else: | ||
|
||
# friendship_matrix is a matrix whose rows and columns are both wallets, | ||
# and a value greater than 0 at index i,j means that wallets i and j are in at least one group together. | ||
friendship_matrix = cluster_df.dot(cluster_df.transpose()) | ||
|
||
# k_indicators is a dataframe with wallets as rows and stamps as columns. | ||
# entry i,g is True if wallet i is in a shared group with anyone from g, and False otherwise. | ||
k_indicators = friendship_matrix.dot(cluster_df).apply(lambda col: col > 0) | ||
|
||
# Create a dictionary to store funding amounts for each project. | ||
# first we'll fund each project with the sum of donations to that project | ||
# then we'll add in the pairwise matching amounts, which is the hard part. | ||
#funding = {p: donation_df[p].sum() for p in projects} | ||
funding = {p: 0 for p in projects} | ||
|
||
for p in projects: | ||
# get the actual k values for this project using contributions and indicators. | ||
|
||
# C will be used to build the matrix of k values. | ||
# It is a matrix where rows are wallets, columns are clusters, and the ith row of the matrix just has wallet i's contribution to the project in every entry. | ||
C = pd.DataFrame(index=donors, columns = ['_'], data = donation_df[p].values).dot(pd.DataFrame(index= ['_'], columns = clusters, data=1)) | ||
# C is attained by taking the matrix multiplication of the column vector donation_df[p] (which is every agent's donation to project p) and a row vector with as many columns as projects, and a 1 in every entry | ||
# the above line is so long mainly because you need to cast Pandas series' (i.e. vectors) as dataframes (i.e. matrices) for the matrix multiplication to work. | ||
|
||
# now, K is a matrix where rows are wallets, columns are projects, and entry i,g ranges between c_i and sqrt(c_i) depending on i's relationship with cluster g and whether "fancy" was set to true or not. | ||
K = (k_indicators * C.pow(1/2)) + ((1 - k_indicators) * C) | ||
|
||
|
||
# Now we have all the k values, which are one of the items inside the innermost sum expressed in COCM. | ||
# the other component of these sums is a division of each k value by the number of groups that user is in. | ||
# P_prime is a matrix that combines k values and total group memberships to attain the value inside the aforementioned innermost sum. | ||
# In other words, entry g,h of P_prime is: | ||
# | ||
# sum_{i in g} K(i,h) / T_i | ||
# | ||
# where T_i is the total number of groups that i is in | ||
P_prime = K.transpose().dot(normalized_clusters) | ||
|
||
# Now, we can create P_prime, whose non-diagonal entries g,h represent the pairwise subsidy given to the pair of groups g and h. | ||
P = (P_prime * P_prime.transpose()).pow(1/2) | ||
|
||
# The diagonal entries of P are not relevant, so get rid of them. We only care about the pairwise subsidies between distinct groups. | ||
np.fill_diagonal(P.values, 0) | ||
|
||
# Now the sum of every entry in P is the amount of subsidy funding COCM awards to the project. | ||
funding[p] += P.sum().sum() | ||
|
||
|
||
return funding | ||
|
||
def standard_donation(donation_df): | ||
# just do a normal vote (nothing quadratic) | ||
projects = donation_df.columns | ||
funding = {p: donation_df[p].sum() for p in projects} | ||
return funding | ||
|
||
def apply_sliding_scale(votes_data): | ||
# Define the score range and corresponding scaling factors | ||
score_range = np.array([15, 25]) | ||
scale_range = np.array([0.5, 1.0]) | ||
|
||
# Apply the scaling factor to the 'amountUSD' column based on the 'score' column | ||
if 'starting_amountUSD' not in votes_data.columns: | ||
votes_data['starting_amountUSD'] = votes_data['amountUSD'] | ||
else: | ||
votes_data['amountUSD'] = votes_data['starting_amountUSD'] | ||
|
||
votes_data['amountUSD'] = votes_data['amountUSD'] * np.interp(votes_data['score'], score_range, scale_range) | ||
|
||
# If the score is below 15, set the scaling factor to 0 | ||
votes_data.loc[votes_data['score'] < 15, 'amountUSD'] = 0 | ||
|
||
# If the score is above 25, set the scaling factor to 1 | ||
votes_data.loc[votes_data['score'] >= 25, 'amountUSD'] = votes_data['starting_amountUSD'] | ||
|
||
return votes_data | ||
|
||
def flag_base_votes(votes, min_donation_threshold, score_threshold): | ||
votes['self_vote'] = (votes['voter'] == votes['payoutAddress']).astype(int) | ||
votes['low_score'] = (votes['score'] < score_threshold).astype(int) | ||
votes['low_amount'] = (votes['amountUSD'] < min_donation_threshold).astype(int) | ||
# base votes are when low_amount = 0, low_score = 0, self_vote = 0 | ||
votes['base_vote'] = np.all([votes['low_amount'] == 0, votes['low_score'] == 0, votes['self_vote'] == 0], axis=0).astype(int) | ||
|
||
return votes | ||
|
||
def prep_donations_data(votes_data, min_donation_threshold, score_threshold): | ||
votes_data = flag_base_votes(votes_data, min_donation_threshold, score_threshold) | ||
votes_data = apply_sliding_scale(votes_data) | ||
return votes_data | ||
|
||
def pivot_votes(round_votes): | ||
pivot_votes = round_votes.pivot_table(index='voter', columns='project_name', values='amountUSD', fill_value=0) | ||
return pivot_votes | ||
|
||
def get_qf_matching(algo, donation_df, matching_cap_percent, matching_amount, cluster_df = None): | ||
projects = donation_df.columns | ||
if algo == 'donation_profile_clustermatch': | ||
funding = donation_profile_clustermatch(donation_df) | ||
elif algo == 'COCM': | ||
funding = COCM(donation_df, cluster_df) | ||
elif algo == 'pairwise': | ||
funding = pairwise(donation_df) | ||
else: | ||
funding = standard_qf(donation_df) | ||
total_money = sum(funding.values()) | ||
funding_normalized = {p: funding[p]/total_money for p in projects} | ||
# Create DataFrame with 'project_name' and 'matching_amount' columns | ||
result = pd.DataFrame(list(funding_normalized.items()), columns=['project_name', 'matching_amount']) | ||
# Apply the cap to the 'matching_amount' column | ||
if matching_cap_percent < 100.0: | ||
result['matching_amount'] = check_matching_cap(result['matching_amount'], matching_cap_percent/100) | ||
# Scale the 'matching_amount' column by the total matching amount | ||
result['matching_percent'] = result['matching_amount'] * 100 | ||
result['matching_amount'] = result['matching_amount'] * matching_amount | ||
return result |
Oops, something went wrong.