Skip to content

Commit

Permalink
GHA CAR daily report (#201)
Browse files Browse the repository at this point in the history
* faithful report

* fix

* fix

* Automated Change

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* move to python, parallize

* fix

* fix

* fix

* fix

* remove old file

* run every 1h

* every 4h

---------

Co-authored-by: Lusitaniae <Lusitaniae@users.noreply.github.com>
  • Loading branch information
Lusitaniae and Lusitaniae authored Dec 18, 2024
1 parent a9ca636 commit 29af2e8
Show file tree
Hide file tree
Showing 2 changed files with 270 additions and 0 deletions.
229 changes: 229 additions & 0 deletions .github/faithful-data-report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
#!/usr/bin/env python3
import asyncio
import aiohttp
from typing import Dict, Optional
from dataclasses import dataclass

@dataclass
class EpochData:
epoch: int
car: str = "n/a"
sha: str = "n/a"
sha_url: str = "n/a"
size: str = "n/a"
poh: str = "n/a"
poh_url: str = "n/a"
txmeta: str = "n/a"
txmeta_url: str = "n/a"
deals: str = "n/a"
indices: str = "n/a"
indices_size: str = "n/a"

class FaithfulDataReport:
def __init__(self):
self.host = "https://files.old-faithful.net"
self.deals_host = "https://filecoin-car-storage-cdn.b-cdn.net"

async def check_url(self, session: aiohttp.ClientSession, url: str) -> bool:
try:
async with session.head(url, allow_redirects=True) as response:
return response.status == 200
except:
return False

async def fetch_text(self, session: aiohttp.ClientSession, url: str) -> Optional[str]:
try:
async with session.get(url) as response:
if response.status == 200:
return await response.text()
except:
pass
return None

async def get_size(self, session: aiohttp.ClientSession, url: str) -> str:
try:
async with session.head(url) as response:
if response.status == 200:
size_bytes = int(response.headers.get('content-length', 0))
size_gb = round(size_bytes / (1024 * 1024 * 1024))
return str(size_gb)
except:
pass
return "n/a"

async def get_indices(self, session: aiohttp.ClientSession, epoch: int) -> str:
cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid"

# Get the CID first
bafy = await self.fetch_text(session, cid_url)
if not bafy:
return "n/a"

# Check all required index files
index_files = [
f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index",
f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index",
f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index",
f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index",
f"epoch-{epoch}-gsfa.index.tar.zstd"
]

checks = await asyncio.gather(*[
self.check_url(session, f"{self.host}/{epoch}/{file}")
for file in index_files
])

return f"{self.host}/{epoch}/epoch-{epoch}-indices" if all(checks) else "n/a"

async def get_indices_size(self, session: aiohttp.ClientSession, epoch: int) -> str:
cid_url = f"{self.host}/{epoch}/epoch-{epoch}.cid"

# Get the CID first
bafy = await self.fetch_text(session, cid_url)
if not bafy:
return "n/a"

# Check all required index files
index_files = [
f"epoch-{epoch}-{bafy}-mainnet-cid-to-offset-and-size.index",
f"epoch-{epoch}-{bafy}-mainnet-sig-to-cid.index",
f"epoch-{epoch}-{bafy}-mainnet-sig-exists.index",
f"epoch-{epoch}-{bafy}-mainnet-slot-to-cid.index",
f"epoch-{epoch}-gsfa.index.tar.zstd"
]

sizes = await asyncio.gather(*[
self.get_size(session, f"{self.host}/{epoch}/{file}")
for file in index_files
])

# Convert sizes to integers, treating "n/a" as 0
size_ints = [int(size) if size != "n/a" else 0 for size in sizes]

# Sum up all sizes
total_size = sum(size_ints)

return str(total_size) if total_size > 0 else "n/a"

async def get_deals(self, session: aiohttp.ClientSession, epoch: int) -> str:
deals_url = f"{self.deals_host}/{epoch}/deals.csv"
deals_content = await self.fetch_text(session, deals_url)

if deals_content and len(deals_content.splitlines()) > 1:
return deals_url
return "n/a"

async def get_epoch_data(self, session: aiohttp.ClientSession, epoch: int) -> EpochData:
car_url = f"{self.host}/{epoch}/epoch-{epoch}.car"
sha_url = f"{self.host}/{epoch}/epoch-{epoch}.sha256"
poh_url = f"{self.host}/{epoch}/poh-check.log"
txmeta_url = f"{self.host}/{epoch}/tx-metadata-check.log"

# Check if CAR exists first
car_exists = await self.check_url(session, car_url)
if not car_exists:
return EpochData(epoch=epoch)

# Gather all data concurrently
sha, size, poh, txmeta, indices, indices_size, deals = await asyncio.gather(
self.fetch_text(session, sha_url),
self.get_size(session, car_url),
self.fetch_text(session, poh_url),
self.fetch_text(session, txmeta_url),
self.get_indices(session, epoch),
self.get_indices_size(session, epoch),
self.get_deals(session, epoch)
)

return EpochData(
epoch=epoch,
car=car_url,
sha=sha if sha else "n/a",
sha_url=sha_url,
size=size,
poh=poh if poh else "n/a",
poh_url=poh_url,
txmeta=txmeta if txmeta else "n/a",
txmeta_url=txmeta_url,
deals=deals,
indices=indices,
indices_size=indices_size
)

def format_row(self, data: EpochData) -> str:
car_cell = f"[epoch-{data.epoch}.car]({data.car})" if data.car != "n/a" else "✗"
sha_cell = f"[{data.sha[:7]}]({data.sha_url})" if data.sha != "n/a" else "✗"
size_cell = f"{data.size} GB" if data.size != "n/a" else "✗"
txmeta_cell = f"[✓]({data.txmeta_url})" if validate_txmeta_output(data.txmeta) else "✗"
poh_cell = f"[✓]({data.poh_url})" if validate_poh_output(data.poh) else "✗"
indices_cell = "✓" if data.indices != "n/a" else "✗"
indices_size_cell = f"{data.indices_size} GB" if data.indices_size != "n/a" else "✗"
deals_cell = f"[✓]({data.deals})" if data.deals != "n/a" else "✗"

return f"| {data.epoch} | {car_cell} | {sha_cell} | {size_cell} | {txmeta_cell} | {poh_cell} | {indices_cell} | {indices_size_cell} | {deals_cell} |"

async def get_current_epoch(self) -> int:
async with aiohttp.ClientSession() as session:
async with session.post(
'https://api.mainnet-beta.solana.com',
json={"jsonrpc":"2.0","id":1, "method":"getEpochInfo"}
) as response:
data = await response.json()
return int(data['result']['epoch'])

async def run(self):
current_epoch = await self.get_current_epoch()
epochs = range(current_epoch, -1, -1) # descending order

print("| Epoch # | CAR | CAR SHA256 | CAR filesize | tx meta check | poh check | Indices | Indices Size | Filecoin Deals |")
print("|---|---|---|---|---|---|---|---|---|")

# concurrency levels
chunk_size = 20

async with aiohttp.ClientSession() as session:
for i in range(0, len(epochs), chunk_size):
chunk = epochs[i:i + chunk_size]
results = await asyncio.gather(
*[self.get_epoch_data(session, epoch) for epoch in chunk]
)

# Print results in order
for result in results:
print(self.format_row(result))

def validate_txmeta_output(txmeta_text: str) -> bool:
"""
Validates that txmeta check output shows zero missing and zero parsing errors
Returns True if valid, False otherwise
"""
if txmeta_text == "n/a":
return False

try:
return 'Transactions with missing metadata: 0' in txmeta_text and \
'Transactions with metadata parsing error: 0' in txmeta_text

except Exception as e:
return False

def validate_poh_output(poh_text: str) -> bool:
"""
Validates the PoH check output
Returns True if valid, False otherwise
"""
if poh_text == "n/a":
return False

try:
return 'Successfully checked PoH on CAR file for epoch' in poh_text

except:
return False

def main():
report = FaithfulDataReport()
asyncio.run(report.run())

if __name__ == "__main__":
main()
41 changes: 41 additions & 0 deletions .github/workflows/data-report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

name: Data Report Generator
on:
push:
schedule:
- cron: '3 */4 * * *' # Run once an hour
workflow_dispatch: # Allow manual trigger

jobs:
generate-report:
runs-on: ubuntu-latest

permissions:
# Give the default GITHUB_TOKEN write permission to commit and push the
# added or changed files to the repository.
contents: write

steps:
- uses: actions/checkout@v4

- name: Generate Report
run: |
mkdir -p docs
python3 -m pip install aiohttp
python3 .github/faithful-data-report.py > docs/CAR-REPORT.md
# Commit changed files back to the repository
- uses: EndBug/add-and-commit@v9
with:
message: Old Faithful CAR data report update
new_branch: 'gha-report'
add: 'docs/*.md'
author_name: github-actions[bot]
author_email: 41898282+github-actions[bot]@users.noreply.github.com
fetch: origin gha-report
push: origin gha-report --force
pull: ' '

0 comments on commit 29af2e8

Please sign in to comment.