Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,37 @@ jobs:
shell: bash
run: |
echo "Attempting to build docs..."
make build_docs
make build_docs
test_datasets:
timeout-minutes: 5
runs-on: ubuntu-latest
defaults:
run:
working-directory: ${{ env.WORKDIR }}
strategy:
matrix:
python-version:
- "3.11"
name: Validate Public Datasets
steps:
- uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: .
cache-key: benchmarks-all

- name: Install dependencies
shell: bash
run: |
echo "Running tests, installing dependencies with poetry..."
poetry install --with test,lint,typing,docs

- name: Request datasets
shell: bash
run: |
echo "Attempting to build docs..."
poetry run python -m scripts.check_datasets
15 changes: 15 additions & 0 deletions langchain_benchmarks/utils/_langsmith.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,18 @@ def download_public_dataset(
print("Done fetching examples.")
finally:
del source_client


def exists_public_dataset(token_or_url: str, *, api_url: str = API_URL) -> bool:
"""Check if a public dataset exists."""
api_url, uuid = _parse_token_or_url(token_or_url, api_url)
source_client = Client(api_url=api_url, api_key="placeholder")
try:
try:
source_client.read_shared_dataset(uuid)
return True
except LangSmithNotFoundError:
return False

finally:
del source_client
22 changes: 22 additions & 0 deletions scripts/check_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Script to check that all registered datasets can be downloaded."""
from langchain_benchmarks import registry
from langchain_benchmarks.utils._langsmith import exists_public_dataset


def check_datasets() -> bool:
"""Check that all tasks can be downloaded."""
ok = True
for task in registry.tasks:
print(f"Checking {task.name}...")
if exists_public_dataset(task.dataset_id):
print(" OK")
else:
ok = False
print(" ERROR: Dataset not found")
return ok


if __name__ == "__main__":
ok = check_datasets()
if not ok:
exit(1)