Skip to content

Commit 1b49840

Browse files
authored
Improved download_attachments_from_page: added filename filter for si… (#1476)
* Improved download_attachments_from_page: added filename filter for single-file-download and in-memory BytesIO option (for pandas loading) in addition to writing to disc. * Hotfix: Changed save_path back to path for fully backwards compatibility --------- Co-authored-by: Anders Imenes <anders.imenes@telenor.no>
1 parent 22c6988 commit 1b49840

File tree

1 file changed

+89
-34
lines changed

1 file changed

+89
-34
lines changed

atlassian/confluence.py

Lines changed: 89 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
# coding=utf-8
2+
import io
3+
import json
24
import logging
35
import os
4-
import time
5-
import json
66
import re
7-
from requests import HTTPError
8-
import requests
9-
from deprecated import deprecated
7+
import time
8+
109
from bs4 import BeautifulSoup
10+
from deprecated import deprecated
11+
import requests
12+
from requests import HTTPError
13+
1114
from atlassian import utils
12-
from .errors import ApiError, ApiNotFoundError, ApiPermissionError, ApiValueError, ApiConflictError, ApiNotAcceptable
15+
from .errors import (
16+
ApiConflictError,
17+
ApiError,
18+
ApiNotAcceptable,
19+
ApiNotFoundError,
20+
ApiPermissionError,
21+
ApiValueError,
22+
)
1323
from .rest_client import AtlassianRestAPI
1424

1525
log = logging.getLogger(__name__)
@@ -1390,40 +1400,85 @@ def attach_file(
13901400
comment=comment,
13911401
)
13921402

1393-
def download_attachments_from_page(self, page_id, path=None, start=0, limit=50):
1394-
"""
1395-
Downloads all attachments from a page
1396-
:param page_id:
1397-
:param path: OPTIONAL: path to directory where attachments will be saved. If None, current working directory will be used.
1398-
:param start: OPTIONAL: The start point of the collection to return. Default: None (0).
1399-
:param limit: OPTIONAL: The limit of the number of attachments to return, this may be restricted by
1400-
fixed system limits. Default: 50
1401-
:return info message: number of saved attachments + path to directory where attachments were saved:
1402-
"""
1403-
if path is None:
1403+
def download_attachments_from_page(self, page_id, path=None, start=0, limit=50, filename=None, to_memory=False):
1404+
"""
1405+
Downloads attachments from a Confluence page. Supports downloading all files or a specific file.
1406+
Files can either be saved to disk or returned as BytesIO objects for in-memory handling.
1407+
1408+
:param page_id: str
1409+
The ID of the Confluence page to fetch attachments from.
1410+
:param path: str, optional
1411+
Directory where attachments will be saved. If None, defaults to the current working directory.
1412+
Ignored if `to_memory` is True.
1413+
:param start: int, optional
1414+
The start point for paginated attachment fetching. Default is 0. Ignored if `filename` is specified.
1415+
:param limit: int, optional
1416+
The maximum number of attachments to fetch per request. Default is 50. Ignored if `filename` is specified.
1417+
:param filename: str, optional
1418+
The name of a specific file to download. If provided, only this file will be fetched.
1419+
:param to_memory: bool, optional
1420+
If True, attachments are returned as a dictionary of {filename: BytesIO object}.
1421+
If False, files are written to the specified directory on disk.
1422+
:return:
1423+
- If `to_memory` is True, returns a dictionary {filename: BytesIO object}.
1424+
- If `to_memory` is False, returns a summary dict: {"attachments_downloaded": int, "path": str}.
1425+
:raises:
1426+
- FileNotFoundError: If the specified path does not exist.
1427+
- PermissionError: If there are permission issues with the specified path.
1428+
- requests.HTTPError: If the HTTP request to fetch an attachment fails.
1429+
- Exception: For any unexpected errors.
1430+
"""
1431+
# Default path to current working directory if not provided
1432+
if not to_memory and path is None:
14041433
path = os.getcwd()
1434+
14051435
try:
1406-
attachments = self.get_attachments_from_content(page_id=page_id, start=start, limit=limit)["results"]
1407-
if not attachments:
1408-
return "No attachments found"
1436+
# Fetch attachments based on the specified parameters
1437+
if filename:
1438+
# Fetch specific file by filename
1439+
attachments = self.get_attachments_from_content(page_id=page_id, filename=filename)["results"]
1440+
if not attachments:
1441+
return f"No attachment with filename '{filename}' found on the page."
1442+
else:
1443+
# Fetch all attachments with pagination
1444+
attachments = self.get_attachments_from_content(page_id=page_id, start=start, limit=limit)["results"]
1445+
if not attachments:
1446+
return "No attachments found on the page."
1447+
1448+
# Prepare to handle downloads
1449+
downloaded_files = {}
14091450
for attachment in attachments:
1410-
file_name = attachment["title"]
1411-
if not file_name:
1412-
file_name = attachment["id"] # if the attachment has no title, use attachment_id as a filename
1451+
file_name = attachment["title"] or attachment["id"] # Use attachment ID if title is unavailable
14131452
download_link = self.url + attachment["_links"]["download"]
1414-
r = self._session.get(download_link)
1415-
file_path = os.path.join(path, file_name)
1416-
with open(file_path, "wb") as f:
1417-
f.write(r.content)
1453+
1454+
# Fetch the file content
1455+
response = self._session.get(download_link)
1456+
response.raise_for_status() # Raise error if request fails
1457+
1458+
if to_memory:
1459+
# Store in BytesIO object
1460+
file_obj = io.BytesIO(response.content)
1461+
downloaded_files[file_name] = file_obj
1462+
else:
1463+
# Save file to disk
1464+
file_path = os.path.join(path, file_name)
1465+
with open(file_path, "wb") as file:
1466+
file.write(response.content)
1467+
1468+
# Return results based on storage mode
1469+
if to_memory:
1470+
return downloaded_files
1471+
else:
1472+
return {"attachments_downloaded": len(attachments), "path": path}
1473+
14181474
except NotADirectoryError:
1419-
raise NotADirectoryError("Verify if directory path is correct and/or if directory exists")
1475+
raise FileNotFoundError(f"The directory '{path}' does not exist.")
14201476
except PermissionError:
1421-
raise PermissionError(
1422-
"Directory found, but there is a problem with saving file to this directory. Check directory permissions"
1423-
)
1424-
except Exception as e:
1425-
raise e
1426-
return {"attachments downloaded": len(attachments), " to path ": path}
1477+
raise PermissionError(f"Permission denied when trying to save files to '{path}'.")
1478+
except requests.HTTPError as http_err:
1479+
raise Exception(f"HTTP error occurred while downloading attachments: {http_err}")
1480+
except Exception as err:
1481+
raise Exception(f"An unexpected error occurred: {err}")
14271482

14281483
def delete_attachment(self, page_id, filename, version=None):
14291484
"""

0 commit comments

Comments
 (0)