calculation based on slot data

oceanprotocol · kdetry · Nov 15, 2023 · Oct 30, 2023 · Oct 31, 2023 · Oct 31, 2023
commit 8519abd0d10d08b63f856265313a71c8f65ec369
diff --git a/pdr_backend/accuracy/app.py b/pdr_backend/accuracy/app.py
@@ -2,38 +2,71 @@
 import json
 from flask import Flask, jsonify
 
-from pdr_backend.util.predictoor_stats import get_endpoint_statistics
-from pdr_backend.util.subgraph_predictions import (
-    get_all_predictions,
-    get_all_contracts,
-    FilterMode,
-)
+from pdr_backend.util.subgraph_predictions import get_all_contracts
 from pdr_backend.accuracy.utils.get_start_end_params import get_start_end_params
+from pdr_backend.util.subgraph_slot import calculate_statistics_for_all_assets
+from pdr_backend.util.subgraph_predictions import get_contract_informations
 
 app = Flask(__name__)
-JSON_FILE_PATH = "pdr_backend/accuracy/output/predictions_data.json"
+JSON_FILE_PATH = "pdr_backend/accuracy/output/accuracy_data.json"
 
 
 def save_statistics_to_file():
     while True:
         try:
             network_param = "mainnet"  # or 'testnet' depending on your preference
 
-            start_ts_param, end_ts_param = get_start_end_params()
             contract_addresses = get_all_contracts(
                 "0x4ac2e51f9b1b0ca9e000dfe6032b24639b172703", network_param
             )
-            predictions = get_all_predictions(
-                start_ts_param,
-                end_ts_param,
-                contract_addresses,
-                network_param,
-                filter_mode=FilterMode.CONTRACT,
+
+            contract_information = get_contract_informations(
+                contract_addresses, network_param
             )
-            statistics = get_endpoint_statistics(predictions)
+
+            statistic_types = [
+                {
+                    "alias": "5m",
+                    "seconds_per_epoch": 300,
+                },
+                {
+                    "alias": "1h",
+                    "seconds_per_epoch": 3600,
+                },
+            ]
+
+            output = []
+
+            print("contract_information", len(contract_information))
+            for statistic_type in statistic_types:
+
+                seconds_per_epoch = statistic_type["seconds_per_epoch"]
+                contracts = list(
+                    filter(
+                        lambda item, spe=seconds_per_epoch: int(item["seconds_per_epoch"]) == spe,
+                        contract_information,
+                    )
+                )
+
+                start_ts_param, end_ts_param = get_start_end_params(seconds_per_epoch)
+
+                contract_ids = [contract["id"] for contract in contracts]
+                print("contract_ids", len(contract_ids))
+
+                # Get statistics for all contracts
+                statistics = calculate_statistics_for_all_assets(
+                    contract_ids, start_ts_param, end_ts_param, network_param
+                )
+
+                output.append(
+                    {
+                        "alias": statistic_type["alias"],
+                        "statistics": statistics,
+                    }
+                )
 
             with open(JSON_FILE_PATH, "w") as f:
-                json.dump({"statistics": statistics}, f)
+                json.dump(output, f)
 
             print("Data saved to JSON")
         except Exception as e:

diff --git a/pdr_backend/accuracy/utils/get_start_end_params.py b/pdr_backend/accuracy/utils/get_start_end_params.py
@@ -2,7 +2,7 @@
 from typing import Tuple
 
 
-def get_start_end_params() -> Tuple[int, int]:
+def get_start_end_params(contract_timeframe: str) -> Tuple[int, int]:
     """
     Returns a tuple of Unix timestamps. The first value is the timestamp
     for one week ago, and the second value is the current timestamp.
@@ -11,6 +11,9 @@ def get_start_end_params() -> Tuple[int, int]:
         Tuple[int, int]: (start_ts, end_ts)
     """
     end_ts = int(datetime.utcnow().timestamp())
-    start_ts = int((datetime.utcnow() - timedelta(weeks=1)).timestamp())
+    time_delta = (
+        timedelta(weeks=2) if contract_timeframe == "5m" else timedelta(weeks=4)
+    )
+    start_ts = int((datetime.utcnow() - time_delta).timestamp())
 
     return start_ts, end_ts
diff --git a/pdr_backend/util/subgraph_predictions.py b/pdr_backend/util/subgraph_predictions.py
@@ -173,3 +173,39 @@ def get_all_contracts(owner_address: str, network: str) -> List[str]:
     contracts = [contract["id"] for contract in contract_data]
 
     return contracts
+
+
+def get_contract_informations(
+    contract_addresses: List[str], network: str
+) -> List[dict]:
+    if network != "mainnet" and network != "testnet":
+        raise Exception("Invalid network, pick mainnet or testnet")
+
+    # Define the GraphQL query
+    query = """
+        {
+            predictContracts(where: {
+                id_in: %s
+            }){
+                id
+                secondsPerEpoch
+            }
+        }
+        """ % json.dumps(
+        contract_addresses
+    )
+
+    # Define the subgraph endpoint
+    result = query_subgraph(get_subgraph_url(network), query, timeout=20.0)
+
+    if not "data" in result:
+        raise Exception("Error fetching contracts: No data returned")
+
+    # Parse the results and construct Contract objects
+    contract_data = result["data"]["predictContracts"]
+    contracts = [
+        {"id": contract["id"], "seconds_per_epoch": contract["secondsPerEpoch"]}
+        for contract in contract_data
+    ]
+
+    return contracts
diff --git a/pdr_backend/util/subgraph_slot.py b/pdr_backend/util/subgraph_slot.py
@@ -0,0 +1,205 @@
+from typing import List, Dict, Any, TypedDict
+
+from pdr_backend.util.subgraph import query_subgraph
+from pdr_backend.util.subgraph_predictions import get_subgraph_url
+
+
+class Slot(TypedDict):
+    id: str
+    slot: str
+    trueValues: List[Dict[str, Any]]
+    roundSumStakesUp: float
+    roundSumStakes: float
+
+
+def get_predict_slots_query(
+    asset_ids: List[str], initial_slot: int, last_slot: int, first: int, skip: int
+) -> str:
+    # Convert list of asset_ids to a GraphQL array format
+    asset_ids_str = str(asset_ids).replace("[", "[").replace("]", "]").replace("'", '"')
+
+    return """
+        query {
+            predictSlots (
+            first: %s
+            skip: %s
+            where: {
+                slot_lte: %s
+                slot_gte: %s
+                predictContract_in: %s
+            }
+            ) {
+            id
+            slot
+            trueValues {
+                id
+                trueValue
+            }
+            roundSumStakesUp
+            roundSumStakes
+            }
+        }
+    """ % (
+        first,
+        skip,
+        initial_slot,
+        last_slot,
+        asset_ids_str,
+    )
+
+
+SECONDS_IN_A_DAY = 86400
+
+
+def get_slots(
+    addresses: List[str],
+    end_ts_param: int,
+    start_ts_param: int,
+    skip: int,
+    slots: List[Slot],
+    network: str = "mainnet",
+) -> List[Slot]:
+    slots = slots or []
+
+    records_per_page = 1000
+
+    query = get_predict_slots_query(
+        addresses,
+        end_ts_param,
+        start_ts_param,
+        records_per_page,
+        skip,
+    )
+
+    print("query", query)
+    result = query_subgraph(
+        get_subgraph_url(network),
+        query,
+        timeout=20.0,
+    )
+
+    new_slots = result["data"]["predictSlots"] or []
+
+    slots.extend(new_slots)
+    if len(new_slots) == records_per_page:
+        return get_slots(
+            addresses,
+            end_ts_param,
+            start_ts_param,
+            skip + records_per_page,
+            slots,
+            network,
+        )
+    return slots
+
+
+def fetch_slots_for_all_assets(
+    asset_ids: List[str],
+    start_ts_param: int,
+    end_ts_param: int,
+    network: str = "mainnet",
+) -> Dict[str, List[Slot]]:
+    all_slots = get_slots(asset_ids, end_ts_param, start_ts_param, 0, [], network)
+
+    slots_by_asset: Dict[str, List[Slot]] = {}
+    for slot in all_slots:
+        slot_id = slot["id"]
+        # split the id to get the asset id
+        asset_id = slot_id.split("-")[0]
+        if asset_id not in slots_by_asset:
+            slots_by_asset[asset_id] = []
+        slots_by_asset[asset_id].append(slot)
+
+    return slots_by_asset
+
+
+def calculate_prediction_prediction_result(
+    round_sum_stakes_up: float, round_sum_stakes: float
+):
+    return {"direction": round_sum_stakes_up > round_sum_stakes}
+
+
+# Function to process individual slot data
+def process_single_slot(slot: Slot, end_of_previous_day_timestamp: int):
+    staked_yesterday = staked_today = 0.0
+    correct_predictions_count = slots_evaluated = 0
+    # split the id to get the slot timestamp
+    timestamp = int(slot["id"].split("-")[1])
+    # print("timestamp", timestamp)
+    # print("end_of_previous_day_timestamp", end_of_previous_day_timestamp)
+    # print("slot", slot)
+    # print("------------------")
+    if timestamp < end_of_previous_day_timestamp:
+        staked_yesterday += float(slot["roundSumStakes"])
+    else:
+        staked_today += float(slot["roundSumStakes"])
+        if float(slot["roundSumStakes"]) == 0:
+            return None
+
+        prediction_result = calculate_prediction_prediction_result(
+            slot["roundSumStakesUp"], slot["roundSumStakes"]
+        )
+        true_values: List[Dict[str, Any]] = slot.get("trueValues", [])
+        true_value = true_values[0]["trueValue"] if true_values else None
+        if true_values and prediction_result["direction"] == (1 if true_value else 0):
+            correct_predictions_count += 1
+        slots_evaluated += 1
+
+    return staked_yesterday, staked_today, correct_predictions_count, slots_evaluated
+
+
+# Function to aggregate statistics across all slots for an asset
+def aggregate_statistics(slots: List[Slot], end_of_previous_day_timestamp: int):
+    total_staked_yesterday = (
+        total_staked_today
+    ) = total_correct_predictions = total_slots_evaluated = 0
+    for slot in slots:
+        slot_results = process_single_slot(slot, end_of_previous_day_timestamp)
+        if slot_results:
+            (
+                staked_yesterday,
+                staked_today,
+                correct_predictions_count,
+                slots_evaluated,
+            ) = slot_results
+            total_staked_yesterday += staked_yesterday
+            total_staked_today += staked_today
+            total_correct_predictions += correct_predictions_count
+            total_slots_evaluated += slots_evaluated
+    return (
+        total_staked_yesterday,
+        total_staked_today,
+        total_correct_predictions,
+        total_slots_evaluated,
+    )
+
+
+# Function to calculate stats for all assets
+def calculate_statistics_for_all_assets(
+    asset_ids: List[str],
+    start_ts_param: int,
+    end_ts_param: int,
+    network: str = "mainnet",
+):
+    slots_by_asset = fetch_slots_for_all_assets(
+        asset_ids, start_ts_param, end_ts_param, network
+    )
+    overall_stats = {}
+    for asset_id, slots in slots_by_asset.items():
+        (
+            staked_yesterday,
+            staked_today,
+            correct_predictions_count,
+            slots_evaluated,
+        ) = aggregate_statistics(slots, end_ts_param - SECONDS_IN_A_DAY)
+        average_accuracy = (
+            0
+            if correct_predictions_count == 0
+            else (correct_predictions_count / slots_evaluated) * 100
+        )
+        overall_stats[asset_id] = {
+            "average_accuracy": average_accuracy,
+            "total_staked_yesterday": staked_yesterday,
+            "total_staked_today": staked_today,
+        }
+    return overall_stats