99
1010
1111@pytest .mark .only
12- async def test_actor_scrapy_title_spider (
12+ async def test_actor_scrapy_title_spider_v2 (
1313 make_actor : MakeActorFunction ,
1414 run_actor : RunActorFunction ,
1515) -> None :
1616 actor_source_files = {
17- 'requirements.txt' : """
18- scrapy ~= 2.12
19- """ ,
2017 'src/spiders/title.py' : """
2118 from __future__ import annotations
2219 from typing import TYPE_CHECKING, Any
@@ -32,6 +29,9 @@ async def test_actor_scrapy_title_spider(
3229 class TitleSpider(Spider):
3330 name = 'title_spider'
3431
32+ # Limit the number of pages to scrape.
33+ custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
34+
3535 def __init__(
3636 self,
3737 start_urls: list[str],
@@ -61,7 +61,7 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
6161 import scrapy
6262
6363 class TitleItem(scrapy.Item):
64- url = scrapy.Field
64+ url = scrapy.Field()
6565 title = scrapy.Field()
6666 """ ,
6767 'src/settings.py' : """
@@ -107,11 +107,10 @@ async def main() -> None:
107107 """ ,
108108 'src/__main__.py' : """
109109 from __future__ import annotations
110- import asyncio
111110 from twisted.internet import asyncioreactor
112111
113112 # Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
114- asyncioreactor.install(asyncio.get_event_loop() )
113+ asyncioreactor.install()
115114
116115 import os
117116 from apify.scrapy import initialize_logging, run_scrapy_actor
@@ -133,5 +132,8 @@ async def main() -> None:
133132
134133 items = await actor .last_run ().dataset ().list_items ()
135134
136- assert items .count == 48
137- assert items .items == {'blah' }
135+ assert items .count >= 10
136+
137+ for item in items .items :
138+ assert 'url' in item
139+ assert 'title' in item
0 commit comments