22
33from typing import TYPE_CHECKING
44
5- import pytest
6-
75if TYPE_CHECKING :
86 from .conftest import MakeActorFunction , RunActorFunction
97
108
11- @pytest .mark .only
129async def test_actor_scrapy_title_spider (
1310 make_actor : MakeActorFunction ,
1411 run_actor : RunActorFunction ,
1512) -> None :
1613 actor_source_files = {
17- 'requirements.txt' : """
18- scrapy ~= 2.12
19- """ ,
2014 'src/spiders/title.py' : """
2115 from __future__ import annotations
2216 from typing import TYPE_CHECKING, Any
@@ -32,6 +26,9 @@ async def test_actor_scrapy_title_spider(
3226 class TitleSpider(Spider):
3327 name = 'title_spider'
3428
29+ # Limit the number of pages to scrape.
30+ custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
31+
3532 def __init__(
3633 self,
3734 start_urls: list[str],
@@ -61,7 +58,7 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
6158 import scrapy
6259
6360 class TitleItem(scrapy.Item):
64- url = scrapy.Field
61+ url = scrapy.Field()
6562 title = scrapy.Field()
6663 """ ,
6764 'src/settings.py' : """
@@ -107,11 +104,10 @@ async def main() -> None:
107104 """ ,
108105 'src/__main__.py' : """
109106 from __future__ import annotations
110- import asyncio
111107 from twisted.internet import asyncioreactor
112108
113109 # Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
114- asyncioreactor.install(asyncio.get_event_loop() )
110+ asyncioreactor.install()
115111
116112 import os
117113 from apify.scrapy import initialize_logging, run_scrapy_actor
@@ -133,5 +129,8 @@ async def main() -> None:
133129
134130 items = await actor .last_run ().dataset ().list_items ()
135131
136- assert items .count == 48
137- assert items .items == {'blah' }
132+ assert items .count >= 10
133+
134+ for item in items .items :
135+ assert 'url' in item
136+ assert 'title' in item
0 commit comments