-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4edcd36
commit 118508a
Showing
12 changed files
with
677 additions
and
165 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
relay_chain: Polkadot | ||
chain: Hydration | ||
wss: wss://hydradx-rpc.dwellir.com | ||
databases: | ||
# - type: postgres | ||
# host: localhost | ||
# port: 5432 | ||
# name: block_ingest_db | ||
# user: db_user | ||
# password: db_password | ||
# - type: duckdb | ||
# path: /path/to/duckdb/database | ||
# - type: mysql | ||
# host: mysql_host | ||
# port: 3306 | ||
# name: block_db | ||
# user: mysql_user | ||
# password: mysql_password | ||
- type: bigquery | ||
project_id: parity-data-infra-evaluation | ||
credentials_path: /Users/pranaypatil/Downloads/parity-data-infra-evaluation-1d7b2ec60ac2.json | ||
dataset: pranay_playground | ||
table: blocks_hydration |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/bin/bash | ||
|
||
# Check if yq is installed | ||
if ! command -v yq &> /dev/null; then | ||
echo "yq is not installed. Please install it to parse YAML files." | ||
exit 1 | ||
fi | ||
|
||
# Read configuration from config.yaml | ||
RELAY_CHAIN=$(yq eval '.relay_chain' config.yaml) | ||
CHAIN=$(yq eval '.chain' config.yaml) | ||
WSS=$(yq eval '.wss' config.yaml) | ||
|
||
# Database configuration | ||
DB_TYPE=$(yq eval '.databases[0].type' config.yaml) | ||
DB_PROJECT=$(yq eval '.databases[0].project_id' config.yaml) | ||
DB_CRED_PATH=$(yq eval '.databases[0].credentials_path' config.yaml) | ||
DB_DATASET=$(yq eval '.databases[0].dataset' config.yaml) | ||
DB_TABLE=$(yq eval '.databases[0].table' config.yaml) | ||
|
||
# Start Substrate API Sidecar | ||
# echo "Starting Substrate API Sidecar..." | ||
# docker run -d --rm --read-only -e SAS_SUBSTRATE_URL="$WSS" -p 8080:8080 parity/substrate-api-sidecar:latest | ||
# if [ $? -eq 0 ]; then | ||
# echo "Substrate API Sidecar started successfully." | ||
# else | ||
# echo "Failed to start Substrate API Sidecar." | ||
# exit 1 | ||
# fi | ||
|
||
|
||
# Default values | ||
DB_PATH="blocks.db" | ||
|
||
cd ingest/ | ||
|
||
# Start the main.py script | ||
echo "Starting main.py script..." | ||
/usr/bin/python3 main.py --chain "$CHAIN" --relay_chain "$RELAY_CHAIN" --wss "$WSS" --db_path "$DB_PATH" --database "$DB_TYPE" --db_project "$DB_PROJECT" --db_cred_path "$DB_CRED_PATH" --db_dataset "$DB_DATASET" --db_table "$DB_TABLE" & | ||
|
||
sleep 30 | ||
|
||
# Start the Streamlit app | ||
# echo "Starting Streamlit app..." | ||
# /usr/bin/python3 -m streamlit run Home.py --server.port 8501 -- --db_path "$DB_PATH" --chain "$CHAIN" --relay_chain "$RELAY_CHAIN" --database "$DB_TYPE" --db_project "$DB_PROJECT" --db_cred_path "$DB_CRED_PATH" --db_dataset "$DB_DATASET" --db_table "$DB_TABLE" & | ||
|
||
# # Wait for all background processes to finish | ||
# wait | ||
|
||
# echo "Starting the services....this will take a minute...." | ||
# sleep 60 | ||
# echo "Both services are now running. You can access Substrate API Sidecar at http://localhost:8080 and Block Ingest Service at http://localhost:8501" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
import os | ||
from google.cloud import bigquery | ||
from google.oauth2 import service_account | ||
|
||
def connect_to_bigquery(project_id, credentials_path): | ||
""" | ||
Connect to BigQuery. | ||
Args: | ||
project_id (str): The Google Cloud project ID. | ||
credentials_path (str): Path to the service account credentials JSON file. | ||
Returns: | ||
google.cloud.bigquery.client.Client: A BigQuery client. | ||
""" | ||
if not os.path.exists(credentials_path): | ||
raise FileNotFoundError(f"Credentials file not found at {credentials_path}") | ||
|
||
credentials = service_account.Credentials.from_service_account_file( | ||
credentials_path, | ||
scopes=["https://www.googleapis.com/auth/cloud-platform"], | ||
) | ||
|
||
return bigquery.Client(credentials=credentials, project=project_id) | ||
|
||
def create_blocks_table(client, dataset_id, table_id): | ||
""" | ||
Create the blocks table if it doesn't exist. | ||
Args: | ||
client (google.cloud.bigquery.client.Client): A BigQuery client. | ||
dataset_id (str): The ID of the dataset to create the table in. | ||
table_id (str): The ID of the table to create. | ||
chain (str): The name of the chain. | ||
relay_chain (str): The name of the relay chain. | ||
""" | ||
schema = [ | ||
bigquery.SchemaField("relay_chain", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("chain", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("timestamp", "INTEGER", mode="REQUIRED"), | ||
bigquery.SchemaField("number", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("hash", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("parentHash", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("stateRoot", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("extrinsicsRoot", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("authorId", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("finalized", "BOOLEAN", mode="REQUIRED"), | ||
bigquery.SchemaField("extrinsics", "RECORD", mode="REPEATED", fields=[ | ||
bigquery.SchemaField("method", "RECORD", fields=[ | ||
bigquery.SchemaField("pallet", "STRING", mode="REQUIRED"), | ||
bigquery.SchemaField("method", "STRING", mode="REQUIRED") | ||
]), | ||
bigquery.SchemaField("signature", "RECORD", fields=[ | ||
bigquery.SchemaField("signature", "STRING"), | ||
bigquery.SchemaField("signer", "STRING") | ||
]), | ||
bigquery.SchemaField("nonce", "STRING"), | ||
bigquery.SchemaField("args", "STRING"), | ||
bigquery.SchemaField("tip", "STRING"), | ||
bigquery.SchemaField("hash", "STRING"), | ||
bigquery.SchemaField("info", "STRING"), | ||
bigquery.SchemaField("era", "RECORD", fields=[ | ||
bigquery.SchemaField("immortalEra", "STRING"), | ||
bigquery.SchemaField("mortalEra", "STRING", mode='REPEATED') | ||
]), | ||
bigquery.SchemaField("events", "RECORD", mode='REPEATED', fields=[ | ||
bigquery.SchemaField("method", "RECORD", fields=[ | ||
bigquery.SchemaField("pallet", "STRING"), | ||
bigquery.SchemaField("method", "STRING") | ||
]), | ||
bigquery.SchemaField("data", "STRING") | ||
]), | ||
bigquery.SchemaField("success", "BOOLEAN"), | ||
bigquery.SchemaField("paysFee", "BOOLEAN"), | ||
]), | ||
bigquery.SchemaField("onFinalize", "RECORD", fields=[ | ||
bigquery.SchemaField("events", "RECORD", mode='REPEATED', fields=[ | ||
bigquery.SchemaField("method", "RECORD", fields=[ | ||
bigquery.SchemaField("pallet", "STRING"), | ||
bigquery.SchemaField("method", "STRING") | ||
]), | ||
bigquery.SchemaField("data", "STRING") | ||
]), | ||
]), | ||
bigquery.SchemaField("onInitialize", "RECORD", fields=[ | ||
bigquery.SchemaField("events", "RECORD", mode='REPEATED', fields=[ | ||
bigquery.SchemaField("method", "RECORD", fields=[ | ||
bigquery.SchemaField("pallet", "STRING"), | ||
bigquery.SchemaField("method", "STRING") | ||
]), | ||
bigquery.SchemaField("data", "STRING") | ||
]), | ||
]), | ||
bigquery.SchemaField("logs", "RECORD", mode='REPEATED', fields=[ | ||
bigquery.SchemaField("type", "STRING"), | ||
bigquery.SchemaField("index", "STRING"), | ||
bigquery.SchemaField("value", "STRING") | ||
]) | ||
] | ||
|
||
table = bigquery.Table(f"{client.project}.{dataset_id}.{table_id}", schema=schema) | ||
table = client.create_table(table, exists_ok=True) | ||
print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}") | ||
|
||
def insert_block(client, dataset_id, table_id, block_data): | ||
""" | ||
Insert a block into the BigQuery table. | ||
Args: | ||
client (google.cloud.bigquery.client.Client): A BigQuery client. | ||
dataset_id (str): The ID of the dataset containing the table. | ||
table_id (str): The ID of the table to insert into. | ||
block_data (dict): The block data to insert. | ||
""" | ||
table_ref = client.dataset(dataset_id).table(table_id) | ||
errors = client.insert_rows_json(table_ref, [block_data]) | ||
if errors: | ||
print(f"Encountered errors while inserting rows: {errors}") | ||
else: | ||
print(f"Inserted 1 row into {dataset_id}.{table_id}") | ||
|
||
def update_block(client, dataset_id, table_id, block_number, update_data): | ||
""" | ||
Update a block in the BigQuery table. | ||
Args: | ||
client (google.cloud.bigquery.client.Client): A BigQuery client. | ||
dataset_id (str): The ID of the dataset containing the table. | ||
table_id (str): The ID of the table to update. | ||
block_number (str): The block number to update. | ||
update_data (dict): The data to update the block with. | ||
""" | ||
query = f""" | ||
UPDATE `{client.project}.{dataset_id}.{table_id}` | ||
SET {', '.join([f"{k} = @{k}" for k in update_data.keys()])} | ||
WHERE number = @block_number | ||
""" | ||
|
||
job_config = bigquery.QueryJobConfig( | ||
query_parameters=[ | ||
bigquery.ScalarQueryParameter("block_number", "STRING", block_number), | ||
*[bigquery.ScalarQueryParameter(k, "STRING", v) for k, v in update_data.items()] | ||
] | ||
) | ||
|
||
query_job = client.query(query, job_config=job_config) | ||
query_job.result() | ||
|
||
print(f"Updated block {block_number} in {dataset_id}.{table_id}") | ||
|
||
|
||
def query(client, query_str): | ||
""" | ||
Execute a query on BigQuery and return the results as a dataframe. | ||
Args: | ||
client (google.cloud.bigquery.client.Client): A BigQuery client. | ||
query_str (str): The query string to execute. | ||
Returns: | ||
pandas.DataFrame: The query results as a DataFrame. | ||
""" | ||
query_job = client.query(query_str) | ||
results = query_job.result() | ||
df = results.to_dataframe() | ||
|
||
return df | ||
|
Oops, something went wrong.