Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Default owners for everything in the repo
* @rewindio/devops
5 changes: 4 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
│ ├── test_attachments.py # AttachmentGenerator tests (53 tests)
│ ├── test_pages.py # PageGenerator tests (52 tests)
│ ├── test_spaces.py # SpaceGenerator tests (55 tests)
│ ├── test_cleanup.py # Cleanup command tests (21 tests)
│ └── test_user_generator.py # User generator tests (51 tests)
├── .github/workflows/
│ ├── test.yml # Tests with 90% coverage threshold
Expand Down Expand Up @@ -509,12 +510,14 @@ When possible, run a quick manual test against a real Confluence instance after
| `--spaces` | No | Override number of spaces | calculated |
| `--concurrency` | No | Max concurrent requests | `5` |
| `--request-delay` | No | Delay between API calls in sync loops (seconds) | `0.0` |
| `--settling-delay` | No | Delay before version creation to let Confluence settle | `1.0` |
| `--settling-delay` | No | Delay before version creation to let Confluence settle | `0.0` |
| `--content-only` | No | Only create spaces, pages, blogposts | `false` |
| `--dry-run` | No | Preview without API calls | `false` |
| `--resume` | No | Resume from checkpoint | `false` |
| `--no-checkpoint` | No | Disable checkpointing | `false` |
| `--no-async` | No | Use synchronous mode | `false` |
| `--cleanup` | No | Delete all test spaces matching the prefix instead of generating data | `false` |
| `--yes` | No | Skip confirmation prompt during cleanup | `false` |
| `--verbose` | No | Enable debug logging | `false` |

**Note**: API token is read from `CONFLUENCE_API_TOKEN` environment variable or `.env` file. Never pass tokens via command line.
Expand Down
31 changes: 25 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,14 @@ python confluence_data_generator.py \
| `--spaces` | Override number of spaces (otherwise calculated from multipliers) | auto |
| `--concurrency` | Max concurrent requests | 5 |
| `--request-delay` | Delay between API calls in sync loops (seconds). Useful for throttling on rate-limited instances. | 0.0 |
| `--settling-delay` | Delay before version creation to let Confluence's eventual consistency settle (seconds). Set to 0 if the built-in retry-on-409 logic is sufficient. | 1.0 |
| `--settling-delay` | Delay before version creation to let Confluence's eventual consistency settle (seconds). Defaults to 0 since retry-on-409 logic handles this automatically; increase if you see excessive 409 retries. | 0.0 |
| `--content-only` | Only create spaces, pages, blogposts | false |
| `--dry-run` | Preview without making API calls | false |
| `--resume` | Resume from checkpoint | false |
| `--no-checkpoint` | Disable checkpointing | false |
| `--no-async` | Use synchronous mode | false |
| `--cleanup` | Delete all test spaces matching the prefix instead of generating data | false |
| `--yes` | Skip confirmation prompt during cleanup | false |
| `--verbose` | Enable debug logging | false |

## Size Buckets
Expand Down Expand Up @@ -241,15 +243,32 @@ For example, with `--count 1000 --size small`:

## Cleanup

All generated content is tagged with the prefix label (default: `TESTDATA`), making it easy to identify. To remove generated data, delete the spaces created during the run:
Use `--cleanup` to find and delete all test spaces matching your prefix:

```bash
# Delete a generated space by key (permanent — does not go to trash)
curl -s -u "your.email@company.com:$CONFLUENCE_API_TOKEN" \
-X DELETE "https://yourcompany.atlassian.net/wiki/rest/api/space/TESTDATA1"
# Show what would be deleted (dry run)
python confluence_data_generator.py \
--url https://yourcompany.atlassian.net/wiki \
--email your.email@company.com \
--cleanup --dry-run \
--prefix TESTDATA

# Delete with confirmation prompt
python confluence_data_generator.py \
--url https://yourcompany.atlassian.net/wiki \
--email your.email@company.com \
--cleanup \
--prefix TESTDATA

# Delete without confirmation (useful for CI/scripts)
python confluence_data_generator.py \
--url https://yourcompany.atlassian.net/wiki \
--email your.email@company.com \
--cleanup --yes \
--prefix TESTDATA
```

The API returns 202 (accepted) and the space — along with all its pages, blogposts, attachments, and comments — is permanently removed.
Cleanup discovers spaces whose key starts with the first 6 characters of your prefix (e.g., `TESTDA1`, `TESTDA2` for prefix `TESTDATA`), then deletes each via the REST API. Deletion is permanent — spaces do not go to the trash when deleted via API.

## Development

Expand Down
161 changes: 158 additions & 3 deletions confluence_data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
import math
import os
import sys
import urllib.parse
from datetime import datetime
from pathlib import Path

import requests
from dotenv import load_dotenv

from generators.attachments import AttachmentGenerator
Expand Down Expand Up @@ -112,7 +114,7 @@ def __init__(
dry_run: bool = False,
concurrency: int = 5,
request_delay: float = 0.0,
settling_delay: float = 1.0,
settling_delay: float = 0.0,
content_only: bool = False,
checkpoint_manager: CheckpointManager | None = None,
):
Expand Down Expand Up @@ -1689,6 +1691,118 @@ async def _create_footer_comment_versions_async(self, comments: list[dict], coun
self.logger.info(f"Created {created} footer comment versions")


def cleanup_spaces(
confluence_url: str,
email: str,
api_token: str,
prefix: str,
skip_confirm: bool = False,
dry_run: bool = False,
) -> int:
"""Find and delete all test spaces matching the prefix.

Args:
confluence_url: Confluence Cloud URL
email: Atlassian account email
api_token: Confluence API token
prefix: Prefix used when generating data
skip_confirm: If True, skip confirmation prompt
dry_run: If True, show what would be deleted without deleting

Returns:
Number of spaces deleted
"""
logger = logging.getLogger(__name__)
base_url = confluence_url.rstrip("/")

normalized_prefix = prefix.strip()
if not normalized_prefix:
logger.error("Cleanup prefix must be non-empty; refusing to list/delete spaces without a safe prefix.")
return 0
key_prefix = normalized_prefix[:6].upper()

with requests.Session() as session:
session.auth = (email, api_token)
session.headers.update({"Accept": "application/json"})

# Discover spaces via v2 API with pagination
matching_spaces = []
cursor = None

while True:
url = f"{base_url}/api/v2/spaces?limit=250"
if cursor:
url += f"&cursor={cursor}"

try:
resp = session.get(url, timeout=30)
except requests.RequestException as e:
logger.error(f"Failed to list spaces: {e}")
return 0

if resp.status_code != 200:
logger.error(f"Failed to list spaces: {resp.status_code} {resp.text[:200]}")
return 0

data = resp.json()
for space in data.get("results", []):
if space.get("key", "").startswith(key_prefix):
matching_spaces.append(space)

# Check for next page
next_link = data.get("_links", {}).get("next")
if not next_link:
break
# Extract cursor from next link
parsed = urllib.parse.urlparse(next_link)
params = urllib.parse.parse_qs(parsed.query)
cursor = params.get("cursor", [None])[0]
if not cursor:
break

if not matching_spaces:
logger.info(f"No spaces found matching prefix '{key_prefix}'")
return 0

# Display matching spaces
logger.info(f"\nFound {len(matching_spaces)} space(s) matching prefix '{key_prefix}':")
for space in matching_spaces:
logger.info(f" {space['key']} - {space.get('name', '(unnamed)')}")

if dry_run:
logger.info(f"\n[DRY RUN] Would delete {len(matching_spaces)} space(s)")
return 0

# Confirm deletion
if not skip_confirm:
try:
answer = input(f"\nDelete {len(matching_spaces)} space(s)? This is permanent. [y/N] ")
except EOFError:
answer = ""
if answer.strip().lower() != "y":
logger.info("Cleanup cancelled")
return 0

# Delete spaces via v1 API
deleted = 0
for space in matching_spaces:
key = space["key"]
encoded_key = urllib.parse.quote(key, safe="")
try:
resp = session.delete(f"{base_url}/rest/api/space/{encoded_key}", timeout=30)
except requests.RequestException as e:
logger.error(f" Failed to delete space {key}: {e}")
continue
if resp.status_code == 202:
logger.info(f" Deleted space {key}")
deleted += 1
else:
logger.error(f" Failed to delete space {key}: {resp.status_code} {resp.text[:200]}")

logger.info(f"\nDeleted {deleted}/{len(matching_spaces)} space(s)")
return deleted


def setup_logging(prefix: str, verbose: bool = False) -> str:
"""Setup logging to console and file.

Expand Down Expand Up @@ -1774,6 +1888,18 @@ def main():
--resume \\
--prefix LOAD

# Delete all test spaces matching a prefix
%(prog)s --url https://mycompany.atlassian.net/wiki \\
--email user@example.com \\
--cleanup \\
--prefix TESTDATA

# Cleanup without confirmation prompt
%(prog)s --url https://mycompany.atlassian.net/wiki \\
--email user@example.com \\
--cleanup --yes \\
--prefix TESTDATA

Checkpointing:
- Progress is automatically saved to confluence_checkpoint_{PREFIX}.json
- Use --resume to continue from where you left off
Expand All @@ -1791,8 +1917,8 @@ def main():
parser.add_argument(
"--count",
type=int,
required=True,
help="Target number of content items (pages + blogposts)",
default=None,
help="Target number of content items (pages + blogposts). Required unless --cleanup is used.",
)
parser.add_argument(
"--prefix",
Expand Down Expand Up @@ -1864,6 +1990,18 @@ def main():
help="Disable checkpointing (not recommended for large runs)",
)

# Cleanup options
parser.add_argument(
"--cleanup",
action="store_true",
help="Delete all test spaces matching the prefix instead of generating data",
)
parser.add_argument(
"--yes",
action="store_true",
help="Skip confirmation prompt during cleanup",
)

args = parser.parse_args()

# Setup logging
Expand All @@ -1882,6 +2020,23 @@ def main():
)
sys.exit(1)

# Handle cleanup mode
if args.cleanup:
cleanup_spaces(
confluence_url=args.url,
email=args.email,
api_token=api_token,
prefix=args.prefix,
skip_confirm=args.yes,
dry_run=args.dry_run,
)
return

# Validate --count is provided for generation mode
if args.count is None:
print("Error: --count is required unless --cleanup is used.", file=sys.stderr)
sys.exit(1)

# Validate size bucket
if args.size not in MULTIPLIERS:
print(f"Error: Invalid size bucket. Must be one of: {', '.join(MULTIPLIERS.keys())}", file=sys.stderr)
Expand Down
Loading