forked from DefiLlama/yield-server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepareSnapshot.py
101 lines (83 loc) · 3.27 KB
/
prepareSnapshot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import sys
import ast
import pandas as pd
# the way i ran this:
# Step 1) DOWNLOAD FULL HISTORY from mongodb
# Step 2) PREPARE DATA
# run this python script, which does:
# filter apy >= 0, tvlUsd >= 1000
# cast dtype: tvlUsd to integer
# round apy columns to 5 decimals
# ...
# stores 3 outputs: the full hourly history (for yield), last value for each day (for stat), last value (for config)
# Step 3) CREATE UUIDS
# based on output from Step 2) run the bootstrapUUID.js file which creates a unique uuid for each unique pool and stores that locally
# Step 4) CREATE THE POSTGRES TABLES
# run the create scripts starting with config, then the others (order doesn't matter for the rest)
def replaceFunc(x: str) -> str:
if x == "[null]":
return "[]"
elif x == "[null,null]":
return "[]"
elif "null," in x:
return x.replace("null,", "")
elif ",null" in x:
return x.replace(",null", "")
else:
return x
def prepare_snapshot(filename: str) -> None:
df = pd.read_csv(f"{filename}")
# correct none, null values in array
df.loc[df["underlyingTokens"].notnull(), "underlyingTokens"] = df.loc[
df["underlyingTokens"].notnull(), "underlyingTokens"
].apply(lambda x: replaceFunc(x) if "null" in x else x)
# remove rows where all 3 apy fields are null
df = df[
~((df["apy"].isnull()) & (df["apyReward"].isnull()) & (df["apyBase"].isnull()))
]
# keep positive apy sum values only
df = df[(df["apy"] >= 0) & (df["apy"] <= 1e6)]
# tvl btw boundary values
df = df[(df["tvlUsd"] >= 1000) & (df["tvlUsd"] <= 2e10)]
# remove pools and project from exclusion list
exclude_pools = [
"0xf4bfe9b4ef01f27920e490cea87fe2642a8da18d",
"DWmAv5wMun4AHxigbwuJygfmXBBe9WofXAtrMCRJExfb",
"ripae-seth-weth-42161",
"ripae-peth-weth-42161",
"0x3eed430cd45c5e2b45aa1adc609cc77c6728d45b",
"0x3c42B0f384D2912661C940d46cfFE1CD10F1c66F-ethereum",
"0x165ab553871b1a6b3c706e15b6a7bb29a244b2f3",
]
df = df[~df["pool"].isin(exclude_pools)]
df = df[df["project"] != "koyo-finance"]
# cast dtypes and round
df["tvlUsd"] = df["tvlUsd"].astype(int)
apy_columns = ["apy", "apyBase", "apyReward"]
df[apy_columns] = df[apy_columns].round(5)
# 1. hourly (for yield table)
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values(["pool", "timestamp"], ascending=True).reset_index(drop=True)
f = "yield_snapshot"
df.to_csv(f"{f}_hourly.csv", index=False)
# 2. prepare daily (for stat)
df_daily = (
df.groupby(["pool", pd.Grouper(key="timestamp", freq="1D")])
.last()
.reset_index()
)
df_daily.to_json(f"{f}_daily.json", orient="records")
# 3. prepare last (for config)
df_last = (
df_daily.sort_values(["pool", "timestamp"], ascending=True)
.groupby("pool")
.last()
.reset_index()
)
# cast string to arrays
func = lambda x: ast.literal_eval(x) if type(x) == str else x
df_last["underlyingTokens"] = df_last["underlyingTokens"].apply(func)
df_last["rewardTokens"] = df_last["rewardTokens"].apply(func)
df_last.to_json(f"{f}_last.json", orient="records")
if __name__ == "__main__":
prepare_snapshot(sys.argv[1])