22
33import pytest
44import pytest_asyncio
5+ from pydantic import BaseModel , Field
56
67from stagehand import Stagehand , StagehandConfig
8+ from stagehand .schemas import ExtractOptions
79
810
911skip_if_no_creds = pytest .mark .skipif (
1214)
1315
1416
17+ class Article (BaseModel ):
18+ """Schema for article extraction tests"""
19+ title : str = Field (..., description = "The title of the article" )
20+ summary : str = Field (None , description = "A brief summary or description of the article" )
21+
22+
1523@pytest_asyncio .fixture (scope = "module" )
1624@skip_if_no_creds
1725async def stagehand_api ():
@@ -35,4 +43,48 @@ async def stagehand_api():
3543@pytest .mark .asyncio
3644async def test_stagehand_api_initialization (stagehand_api ):
3745 """Ensure that Stagehand initializes correctly against the Browserbase API."""
38- assert stagehand_api .session_id is not None
46+ assert stagehand_api .session_id is not None
47+
48+
49+ @skip_if_no_creds
50+ @pytest .mark .integration
51+ @pytest .mark .api
52+ @pytest .mark .asyncio
53+ async def test_api_extract_functionality (stagehand_api ):
54+ """Test core extract functionality in API mode - extracted from e2e tests."""
55+ stagehand = stagehand_api
56+
57+ # Navigate to a content-rich page
58+ await stagehand .page .goto ("https://news.ycombinator.com" )
59+
60+ # Test simple text-based extraction
61+ titles_text = await stagehand .page .extract (
62+ "Extract the titles of the first 3 articles on the page as a JSON array"
63+ )
64+
65+ # Verify extraction worked
66+ assert titles_text is not None
67+
68+ # Test schema-based extraction
69+ extract_options = ExtractOptions (
70+ instruction = "Extract the first article's title and any available summary" ,
71+ schema_definition = Article
72+ )
73+
74+ article_data = await stagehand .page .extract (extract_options )
75+ assert article_data is not None
76+
77+ # Validate the extracted data structure (Browserbase format)
78+ if hasattr (article_data , 'data' ) and article_data .data :
79+ # BROWSERBASE mode format
80+ article = Article .model_validate (article_data .data )
81+ assert article .title
82+ assert len (article .title ) > 0
83+ elif hasattr (article_data , 'title' ):
84+ # Fallback format
85+ article = Article .model_validate (article_data .model_dump ())
86+ assert article .title
87+ assert len (article .title ) > 0
88+
89+ # Verify API session is active
90+ assert stagehand .session_id is not None
0 commit comments