Skip to content

[Confluence] new method added confluence get_tables_from_page #1281

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions atlassian/confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from requests import HTTPError
import requests
from deprecated import deprecated
from bs4 import BeautifulSoup
from atlassian import utils
from .errors import ApiError, ApiNotFoundError, ApiPermissionError, ApiValueError, ApiConflictError, ApiNotAcceptable
from .rest_client import AtlassianRestAPI
Expand Down Expand Up @@ -356,6 +357,46 @@ def get_page_by_id(self, page_id, expand=None, status=None, version=None):

return response

def get_tables_from_page(self, page_id):
"""
Fetches html tables added to confluence page
:param page_id: integer confluence page_id
:return: json object with page_id, number_of_tables_in_page and list of list tables_content representing scrapepd tables
"""
try:
page_content = self.get_page_by_id(page_id, expand="body.storage")["body"]["storage"]["value"]

if page_content:
tables_raw = [
[[cell.text for cell in row("th") + row("td")] for row in table("tr")]
for table in BeautifulSoup(page_content, features="lxml")("table")
]
if len(tables_raw) > 0:
return json.dumps(
{
"page_id": page_id,
"number_of_tables_in_page": len(tables_raw),
"tables_content": tables_raw,
}
)
else:
return {
"No tables found for page: ": page_id,
}
else:
return {"Page content is empty"}
except HTTPError as e:
if e.response.status_code == 404:
# Raise ApiError as the documented reason is ambiguous
log.error("Couldn't retrieve tables from page", page_id)
raise ApiError(
"There is no content with the given pageid, pageid params is not an integer "
"or the calling user does not have permission to view the page",
reason=e,
)
except Exception as e:
log.error("Error occured", e)

def get_page_labels(self, page_id, prefix=None, start=None, limit=None):
"""
Returns the list of labels on a piece of Content.
Expand Down
3 changes: 3 additions & 0 deletions docs/confluence.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ Page actions
# Add comment into page
confluence.add_comment(page_id, text)

# Fetch tables from Confluence page
confluence.get_page_tables(page_id)

Template actions
----------------

Expand Down
17 changes: 17 additions & 0 deletions examples/confluence/confluence_get_tables_from_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from atlassian import Confluence
import logging

confluence = Confluence(
url="<instance_url>",
username="<user_enamil>",
password="api_key",
)
page_id = 393464
logging.basicConfig(level=logging.INFO)
# Page_id is the page id of the page you want to get the tables from.

result = confluence.get_tables_from_page(page_id)
print(result)
# Let's say page has two table, each one has 3 columns and 2 rows'
# Method should return following output: {"page_id": 393464, "number_of_tables_in_page": 2, "tables_content": [[["header1", "header2", "header3"], ["h1r1", "h2r1", "h3r1"], ["h1r2", "h2r2", "h3r2"]], [["table2 header1", "table2 header2", "table2 header3"], ["h1r1", "h2r1", "h3r1"], ["h1r2", "h2r2", "h3r2"]]]}
# tables_content is a list of lists of lists. Each nested list represents a table. Each nested list inside a table represents a row.
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ six
oauthlib
requests_oauthlib
requests-kerberos==0.14.0
bs4
lxml
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ deps =
pytest-cov
coverage
requests
bs4
commands =
coverage erase
pytest -v --cov=atlassian --cov-branch --cov-report=xml
Expand Down