Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 7 additions & 18 deletions md2cf/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,28 +319,17 @@ def main():

pages_to_upload = collect_pages_to_upload(args)

# DEBUG: Print all collected pages
console.log("\n[bold cyan]DEBUG: Collected pages:[/]", markup=True)
for idx, page in enumerate(pages_to_upload, 1):
has_content = "✓" if page.body else "✗"
file_info = f" from {page.file_path}" if page.file_path else " (folder page)"
console.log(f" {idx}. [{has_content}] {page.title}{file_info}")

page_title_counts = Counter([page.title for page in pages_to_upload])
colliding_titles = [
title for title, count in page_title_counts.most_common() if count > 1
]
if colliding_titles:
error_console.log(
":x: Some documents have the [bold]same title[/], but all Confluence pages "
"in the same space must have different titles.\n\n"
"These are the documents (and path, if available) with identical titles:",
markup=True,
)
colliding_titles_table = rich.table.Table(box=box.SIMPLE)
colliding_titles_table.add_column("Title")
colliding_titles_table.add_column("File")
for title in colliding_titles:
for filename in [
page.file_path for page in pages_to_upload if page.title == title
]:
# error_console.log(f"{title}\t{filename}\n", markup=True)
colliding_titles_table.add_row(title, str(filename))
error_console.log(colliding_titles_table)
sys.exit(1)

for page in pages_to_upload:
for attachment in page.attachments:
Expand Down
77 changes: 74 additions & 3 deletions md2cf/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,28 @@ def get_pages_from_directory(
base_path = file_path.resolve()
folder_data = dict()
git_repo = GitRepository(file_path, use_gitignore=use_gitignore)


# Track which markdown files are used as folder content (to skip them later)
files_used_as_folder_content = set()

# First pass: identify all markdown files that will be used as folder content
for current_path, directories, file_names in os.walk(file_path):
current_path = Path(current_path).resolve()
if git_repo.is_ignored(current_path):
continue

# For each subdirectory, check if there's a matching markdown file
for subdir in directories:
subdir_path = current_path / subdir
if git_repo.is_ignored(subdir_path):
continue

# Look for a markdown file with the same stem as the subdirectory
potential_file = current_path / f"{subdir}.md"
if potential_file.exists() and not git_repo.is_ignored(potential_file):
files_used_as_folder_content.add(potential_file.resolve())

# Second pass: process all files and folders
for current_path, directories, file_names in os.walk(file_path):
current_path = Path(current_path).resolve()

Expand All @@ -130,7 +151,22 @@ def get_pages_from_directory(
path for path in markdown_files if not git_repo.is_ignored(path)
]

folder_data[current_path] = {"n_files": len(markdown_files)}
# Build a set of subdirectory names in the current path
subdirs_in_current = {d for d in directories}

# Check if there's a markdown file at the parent level with matching stem
folder_content_file = None
if current_path != base_path:
# Check if a markdown file with the same stem exists in the parent directory
potential_file = current_path.parent / f"{current_path.name}.md"
if potential_file.resolve() in files_used_as_folder_content:
folder_content_file = potential_file

folder_data[current_path] = {
"n_files": len(markdown_files),
"content_file": folder_content_file,
"subdirs": subdirs_in_current
}

# we'll capture title and path of the parent folder for this folder:
folder_parent_title = None
Expand Down Expand Up @@ -176,18 +212,53 @@ def get_pages_from_directory(

folder_data[current_path]["title"] = folder_title

# Prepare folder page with content if a matching markdown file exists
folder_page_body = ""
folder_page_file_path = None
folder_page_attachments = []
folder_page_relative_links = []

if folder_data[current_path]["content_file"]:
# Use the content from the matching markdown file
content_page = get_page_data_from_file_path(
folder_data[current_path]["content_file"],
strip_header=strip_header,
remove_text_newlines=remove_text_newlines,
enable_relative_links=enable_relative_links,
)
folder_page_body = content_page.body
folder_page_file_path = content_page.file_path
folder_page_attachments = content_page.attachments
folder_page_relative_links = content_page.relative_links
# Override folder title with the document title if available
if content_page.title:
folder_title = content_page.title
parent_page_title = content_page.title # Update parent_page_title for children
folder_data[current_path]["title"] = folder_title

if folder_title is not None and (
markdown_files or (directories and not skip_empty and not collapse_empty)
):
processed_pages.append(
Page(
title=folder_title,
parent_title=folder_parent_title,
body="",
body=folder_page_body,
file_path=folder_page_file_path,
attachments=folder_page_attachments,
relative_links=folder_page_relative_links,
)
)

for markdown_file in markdown_files:
# Skip this file if it was already used as the folder's content
if folder_data[current_path]["content_file"] == markdown_file:
continue

# Skip this file if it's being used as content for any folder
if markdown_file.resolve() in files_used_as_folder_content:
continue

processed_page = get_page_data_from_file_path(
markdown_file,
strip_header=strip_header,
Expand Down