[Bug]: Failed to handle download: 'AsyncPlaywrightCrawlerStrategy' object has no attribute 'downloads_path' #585
Open
Description
crawl4ai version
0.4.247
Expected Behavior
1 download file into downloads_path
2 find downloaded file path in downloaded_files of crawler.arun result
Current Behavior
1 succeed to download file into downloads_path
2 downloaded_files of crawler.arun result is None and with error in log
Is this reproducible?
Yes
Inputs Causing the Bug
Steps to Reproduce
Code snippets
from crawl4ai import AsyncWebCrawler
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
import asyncio
import os
import json
from crawl4ai import CacheMode
# 可以跑通,但是没有下载
async def download_citation():
# Set up download configuration
downloads_path = './test'
os.makedirs(downloads_path, exist_ok=True)
config = BrowserConfig(
downloads_path=downloads_path,
accept_downloads=True
)
async with AsyncWebCrawler(
config=config,
user_agent_mode="random",
user_agent_generator_config={
"device_type": "mobile",
"os_type": "android"
},
verbose=True
) as crawler:
run_config = CrawlerRunConfig(
js_code="""
(function() {
const xpath = "//a[@data-test='citation-link' and contains(text(), 'Download citation')]";
const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
const element = result.singleNodeValue;
if (element) {
console.log('Found citation button, clicking...');
element.click();
return true;
} else {
console.log('Citation button not found');
return false;
}
})();
""",
delay_before_return_html=1,
cache_mode=CacheMode.BYPASS
)
result = await crawler.arun(
url="https://www.nature.com/articles/s41467-024-55621-z",
config=run_config,
magic=True,
remove_overlay_elements=True
)
# 只保存基本属性
basic_attributes = [
'status_code',
'status',
'json',
'url',
'links',
'markdown',
'markdown_v2',
'metadata',
'success',
'error_message',
'downloaded_files',
'html',
'cleaned_html',
'response_headers'
]
result_dict = {}
for attr in basic_attributes:
if hasattr(result, attr):
value = getattr(result, attr)
if isinstance(value, (str, int, float, bool, list, dict, type(None))):
result_dict[attr] = value
else:
result_dict[attr] = str(value)
# 写入结果
with open("citation_result.json", "w", encoding='utf-8') as f:
json.dump(result_dict, f, ensure_ascii=False, indent=2)
print("\nResult dictionary has been saved to citation_result.json")
print("Downloaded files:", result.downloaded_files if hasattr(result, 'downloaded_files') else None)
if __name__ == "__main__":
asyncio.run(download_citation())
OS
macOS
Python version
3.12.6
Browser
Chrome
Browser version
No response