Skip to content

Commit 416a2b0

Browse files
authored
[Confluence] New method 'scrap_regex_from_page' + docs + example (#1327)
* fixing minor issue in scrap_regex_from_issue method * new Confluence method scrap_regex_from_page+ docs + examples --------- Co-authored-by: gkowalc <>
1 parent 7c3dcee commit 416a2b0

File tree

4 files changed

+54
-11
lines changed

4 files changed

+54
-11
lines changed

atlassian/confluence.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import time
55
import json
6-
6+
import re
77
from requests import HTTPError
88
import requests
99
from deprecated import deprecated
@@ -397,6 +397,32 @@ def get_tables_from_page(self, page_id):
397397
except Exception as e:
398398
log.error("Error occured", e)
399399

400+
def scrap_regex_from_page(self, page_id, regex):
401+
"""
402+
Method scraps regex patterns from a Confluence page_id.
403+
404+
:param page_id: The ID of the Confluence page.
405+
:param regex: The regex pattern to scrape.
406+
:return: A list of regex matches.
407+
"""
408+
regex_output = []
409+
page_output = self.get_page_by_id(page_id, expand="body.storage")["body"]["storage"]["value"]
410+
try:
411+
if page_output is not None:
412+
description_matches = [x.group(0) for x in re.finditer(regex, page_output)]
413+
if description_matches:
414+
regex_output.extend(description_matches)
415+
return regex_output
416+
except HTTPError as e:
417+
if e.response.status_code == 404:
418+
# Raise ApiError as the documented reason is ambiguous
419+
log.error("couldn't find page_id : ", page_id)
420+
raise ApiNotFoundError(
421+
"There is no content with the given page id,"
422+
"or the calling user does not have permission to view the page",
423+
reason=e,
424+
)
425+
400426
def get_page_labels(self, page_id, prefix=None, start=None, limit=None):
401427
"""
402428
Returns the list of labels on a piece of Content.

atlassian/jira.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,15 +1553,16 @@ def scrap_regex_from_issue(self, issue, regex):
15531553
comments = issue_output["fields"]["comment"]["comments"]
15541554

15551555
try:
1556-
description_matches = [x.group(0) for x in re.finditer(regex, description)]
1557-
if description_matches:
1558-
regex_output.extend(description_matches)
1559-
1560-
for comment in comments:
1561-
comment_html = comment["body"]
1562-
comment_matches = [x.group(0) for x in re.finditer(regex, comment_html)]
1563-
if comment_matches:
1564-
regex_output.extend(comment_matches)
1556+
if description is not None:
1557+
description_matches = [x.group(0) for x in re.finditer(regex, description)]
1558+
if description_matches:
1559+
regex_output.extend(description_matches)
1560+
1561+
for comment in comments:
1562+
comment_html = comment["body"]
1563+
comment_matches = [x.group(0) for x in re.finditer(regex, comment_html)]
1564+
if comment_matches:
1565+
regex_output.extend(comment_matches)
15651566

15661567
return regex_output
15671568
except HTTPError as e:

docs/confluence.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,10 @@ Page actions
156156
confluence.add_comment(page_id, text)
157157
158158
# Fetch tables from Confluence page
159-
confluence.get_page_tables(page_id)
159+
confluence.get_tables_from_page(page_id)
160+
161+
# Get regex matches from Confluence page
162+
confluence.scrap_regex_from_page(page_id, regex)
160163
161164
Template actions
162165
----------------
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from atlassian import Confluence
2+
3+
4+
confluence = Confluence(
5+
url="<instance_url>",
6+
username="<user_enamil>",
7+
password="api_key",
8+
)
9+
page_id = 393464
10+
ipv4_regex = r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
11+
confluence.scrap_regex_from_page(
12+
page_id, ipv4_regex
13+
) # method returns list of matches of ipv4 addresses from page content.

0 commit comments

Comments
 (0)