Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 199 additions & 20 deletions autogpt_platform/backend/backend/blocks/github/pull_requests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from typing import Literal
from typing import Literal, Optional

from typing_extensions import TypedDict

Expand Down Expand Up @@ -31,17 +31,77 @@ class Input(BlockSchemaInput):
description="URL of the GitHub repository",
placeholder="https://github.com/owner/repo",
)
state: Literal["open", "closed", "all"] = SchemaField(
description="Filter pull requests by state",
default="open",
)
base: str = SchemaField(
description=(
"Filter pull requests by base branch name. "
"Leave empty to return PRs targeting any branch."
),
default="",
advanced=True,
)
sort: Literal["created", "updated", "popularity", "long-running"] = SchemaField(
description=(
"Sort pull requests by: created date, last updated date, "
"comment count (popularity), or age (long-running)."
),
default="created",
advanced=True,
)
direction: Literal["asc", "desc"] = SchemaField(
description="Sort direction: descending (newest first) or ascending (oldest first).",
default="desc",
advanced=True,
)
per_page: int = SchemaField(
description=(
"Number of pull requests to return per page (max 100). "
"Set to 0 to auto-paginate and return ALL pull requests."
),
default=30,
advanced=True,
)
since: str = SchemaField(
description=(
"Only return pull requests created/updated at or after this time. "
"ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ. Leave empty for no filter."
),
default="",
advanced=True,
)
Comment on lines +67 to +74
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Documentation/implementation mismatch: since filter only checks created_at

The description at line 41 states "created/updated at or after" but the implementation at lines 164-168 only filters based on created_at. PRs updated after since but created before it will be excluded.

Either update the description to match the behavior, or extend the filter to also check updated_at.

Option A: Fix the description to match implementation
         since: str = SchemaField(
             description=(
-                "Only return pull requests created/updated at or after this time. "
+                "Only return pull requests created at or after this time. "
                 "ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ. Leave empty for no filter."
             ),
             default="",
             advanced=True,
         )
Option B: Extend filter to also check `updated_at`
         for pr in data:
             created_at = pr.get("created_at", "")
+            updated_at = pr.get("updated_at", "")
             if since_dt and created_at:
                 pr_dt = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
-                if pr_dt < since_dt:
+                updated_dt = (
+                    datetime.fromisoformat(updated_at.replace("Z", "+00:00"))
+                    if updated_at
+                    else pr_dt
+                )
+                if pr_dt < since_dt and updated_dt < since_dt:
                     continue

Also applies to: 163-168

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@autogpt_platform/backend/backend/blocks/github/pull_requests.py` around lines
39 - 46, The docs say the SchemaField since should filter PRs "created/updated
at or after" but the current filter only checks created_at; modify the filter
logic that currently compares created_at (the block around the code checking
created_at >= since) to instead include PRs where either created_at >= since OR
updated_at >= since (parse/compare both ISO timestamps robustly and handle
empty/default since value), or alternatively change the since SchemaField
description to only mention creation; prefer updating the filter so both
created_at and updated_at are considered (refer to the since SchemaField and the
function/block that currently filters on created_at).

head: str = SchemaField(
description=(
"Filter by head branch. Use 'user:branch-name' format for forks "
"(e.g. 'octocat:feature-branch'). Leave empty for no filter."
),
default="",
advanced=True,
)

class Output(BlockSchemaOutput):
class PRItem(TypedDict):
title: str
url: str
number: int
state: str
author: str
created_at: str
updated_at: str
merged_at: Optional[str]
base_branch: str
head_branch: str
labels: list[str]
draft: bool

pull_request: PRItem = SchemaField(
title="Pull Request", description="PRs with their title and URL"
title="Pull Request",
description="PRs with their title, URL, number, state, author, timestamps, branches, labels, and draft status",
)
pull_requests: list[PRItem] = SchemaField(
description="List of pull requests with their title and URL"
description="List of pull requests with enriched metadata",
)
error: str = SchemaField(
description="Error message if listing pull requests failed"
Expand All @@ -50,13 +110,25 @@ class PRItem(TypedDict):
def __init__(self):
super().__init__(
id="ffef3c4c-6cd0-48dd-817d-459f975219f4",
description="This block lists all pull requests for a specified GitHub repository.",
description=(
"This block lists pull requests for a specified GitHub repository "
"with enriched metadata including timestamps, branches, labels, and draft status. "
"Supports filtering by state, base branch, head branch, and date; "
"sorting by created, updated, popularity, or age; and optional auto-pagination."
),
categories={BlockCategory.DEVELOPER_TOOLS},
input_schema=GithubListPullRequestsBlock.Input,
output_schema=GithubListPullRequestsBlock.Output,
test_input={
"repo_url": "https://github.com/owner/repo",
"credentials": TEST_CREDENTIALS_INPUT,
"state": "open",
"base": "",
"sort": "created",
"direction": "desc",
"per_page": 30,
"since": "",
"head": "",
},
test_credentials=TEST_CREDENTIALS,
test_output=[
Expand All @@ -66,6 +138,16 @@ def __init__(self):
{
"title": "Pull request 1",
"url": "https://github.com/owner/repo/pull/1",
"number": 1,
"state": "open",
"author": "username",
"created_at": "2024-01-01T00:00:00Z",
"updated_at": "2024-01-02T00:00:00Z",
"merged_at": None,
"base_branch": "master",
"head_branch": "feature-branch",
"labels": [],
"draft": False,
}
],
),
Expand All @@ -74,6 +156,16 @@ def __init__(self):
{
"title": "Pull request 1",
"url": "https://github.com/owner/repo/pull/1",
"number": 1,
"state": "open",
"author": "username",
"created_at": "2024-01-01T00:00:00Z",
"updated_at": "2024-01-02T00:00:00Z",
"merged_at": None,
"base_branch": "master",
"head_branch": "feature-branch",
"labels": [],
"draft": False,
},
),
],
Expand All @@ -82,39 +174,125 @@ def __init__(self):
{
"title": "Pull request 1",
"url": "https://github.com/owner/repo/pull/1",
"number": 1,
"state": "open",
"author": "username",
"created_at": "2024-01-01T00:00:00Z",
"updated_at": "2024-01-02T00:00:00Z",
"merged_at": None,
"base_branch": "master",
"head_branch": "feature-branch",
"labels": [],
"draft": False,
}
]
},
)

@staticmethod
async def list_prs(
credentials: GithubCredentials, repo_url: str
) -> list[Output.PRItem]:
credentials: GithubCredentials,
repo_url: str,
state: str = "open",
base: str = "",
sort: str = "created",
direction: str = "desc",
per_page: int = 30,
since: str = "",
head: str = "",
) -> list["GithubListPullRequestsBlock.Output.PRItem"]:
api = get_api(credentials)
pulls_url = repo_url + "/pulls"
response = await api.get(pulls_url)
data = response.json()
pull_requests: list[GithubListPullRequestsBlock.Output.PRItem] = [
{"title": pr["title"], "url": pr["html_url"]} for pr in data
]

# Build query params
params: dict[str, str] = {
"state": state,
"sort": sort,
"direction": direction,
}
if base:
params["base"] = base
if head:
params["head"] = head

pull_requests: list[GithubListPullRequestsBlock.Output.PRItem] = []

if per_page == 0:
# Auto-paginate: fetch all pages
page = 1
while True:
params["per_page"] = "100"
params["page"] = str(page)
response = await api.get(pulls_url, params=params)
data = response.json()
if not data:
break
for pr in data:
item = GithubListPullRequestsBlock._parse_pr(pr, since)
if item:
pull_requests.append(item)
if len(data) < 100:
break
page += 1
else:
params["per_page"] = str(min(per_page, 100))
response = await api.get(pulls_url, params=params)
data = response.json()
for pr in data:
item = GithubListPullRequestsBlock._parse_pr(pr, since)
if item:
pull_requests.append(item)

return pull_requests

@staticmethod
def _parse_pr(
pr: dict,
since: str = "",
) -> Optional["GithubListPullRequestsBlock.Output.PRItem"]:
"""Parse a raw GitHub API PR object into a PRItem, applying the `since` filter."""
created_at = pr.get("created_at", "")
if since and created_at and created_at < since:
return None
Comment on lines +254 to +256
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

since filter relies on fragile lexicographic string comparison.

GitHub returns created_at in canonical ...Z form, but the user-supplied since may use an offset (+00:00) or a date-only value. Lexicographic comparison then silently filters incorrectly (e.g. "...Z" vs "...+00:00" differ at the offset char). Parse both to datetime for a correct, timezone-aware comparison.

♻️ Proposed fix
+from datetime import datetime
@@
-        created_at = pr.get("created_at", "")
-        if since and created_at and created_at < since:
-            return None
+        created_at = pr.get("created_at", "")
+        if since and created_at:
+            since_dt = datetime.fromisoformat(since.replace("Z", "+00:00"))
+            created_dt = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
+            if created_dt < since_dt:
+                return None
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@autogpt_platform/backend/backend/blocks/github/pull_requests.py` around lines
254 - 256, The current check uses fragile lexicographic string comparison
between created_at and since; instead parse both created_at (from
pr.get("created_at")) and the user-supplied since into timezone-aware datetime
objects (e.g., using a robust ISO parser like dateutil.parser.isoparse or
normalizing 'Z' to '+00:00' and using datetime.fromisoformat), handle parse
errors gracefully (fall back to skipping the filter or logging), then perform a
proper datetime comparison (created_at_dt < since_dt) before returning None;
update the block around the created_at and since variables to perform this
parsing and comparison.

Comment on lines +254 to +256
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The since filter for pull requests only checks created_at, ignoring updated_at. This leads to incorrect results when sorting by updated.
Severity: MEDIUM

Suggested Fix

Modify _parse_pr to honor the since filter based on the sort parameter. If sort is "updated", filter using updated_at. Otherwise, filter using created_at. This aligns the implementation with the documented behavior of filtering by "created/updated" time.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: autogpt_platform/backend/backend/blocks/github/pull_requests.py#L254-L256

Potential issue: The `since` input field's description states it filters pull requests
"created/updated at or after this time." However, the implementation in `_parse_pr` only
compares the `since` value against the `created_at` timestamp. The `updated_at` field is
never evaluated for filtering. Consequently, when a user combines `sort="updated"` with
a `since` value, the filter behaves unexpectedly, applying to the creation date instead
of the update date, leading to incorrect and confusing results.

return {
"title": pr.get("title", ""),
"url": pr.get("html_url", ""),
"number": pr.get("number", 0),
"state": pr.get("state", ""),
"author": pr.get("user", {}).get("login", ""),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Guard against null PR author to avoid AttributeError.

GitHub can return "user": null for PRs from deleted/ghost accounts. Since the key is present, pr.get("user", {}) returns None (not the default), so .get("login", "") raises AttributeError and fails the whole block.

🛡️ Proposed fix
-            "author": pr.get("user", {}).get("login", ""),
+            "author": (pr.get("user") or {}).get("login", ""),
The same pattern applies to `base`/`head` (Line 266-267), though those are far less likely to be null.
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
"author": pr.get("user", {}).get("login", ""),
"author": (pr.get("user") or {}).get("login", ""),
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@autogpt_platform/backend/backend/blocks/github/pull_requests.py` at line 262,
The code assumes pr.get("user")/pr.get("base")/pr.get("head") return dicts and
calls .get(...) on them, which raises AttributeError when GitHub returns null;
update the mappings that set "author" and the base/head fields to use a safe
fallback like (pr.get("user") or {}).get("login","") and similarly
(pr.get("base") or {}).get("ref","") and (pr.get("head") or {}).get("ref","") so
the code handles null user/base/head without crashing (look for the mapping that
sets "author" and the base/head extraction in pull_requests.py).

"created_at": created_at,
"updated_at": pr.get("updated_at", ""),
"merged_at": pr.get("merged_at"),
"base_branch": pr.get("base", {}).get("ref", ""),
"head_branch": pr.get("head", {}).get("ref", ""),
"labels": [label["name"] for label in pr.get("labels", [])],
"draft": pr.get("draft", False),
}

async def run(
self,
input_data: Input,
*,
credentials: GithubCredentials,
**kwargs,
) -> BlockOutput:
pull_requests = await self.list_prs(
credentials,
input_data.repo_url,
)
yield "pull_requests", pull_requests
for pr in pull_requests:
yield "pull_request", pr

try:
pull_requests = await self.list_prs(
credentials,
input_data.repo_url,
state=input_data.state,
base=input_data.base,
sort=input_data.sort,
direction=input_data.direction,
per_page=input_data.per_page,
since=input_data.since,
head=input_data.head,
)
yield "pull_requests", pull_requests
for pr in pull_requests:
yield "pull_request", pr
except Exception as e:
yield "error", str(e)

class GithubMakePullRequestBlock(Block):
class Input(BlockSchemaInput):
Expand Down Expand Up @@ -534,7 +712,7 @@ def __init__(self):
@staticmethod
async def list_reviewers(
credentials: GithubCredentials, pr_url: str
) -> list[Output.ReviewerItem]:
) -> list["GithubListPRReviewersBlock.Output.ReviewerItem"]:
api = get_api(credentials)
reviewers_url = prepare_pr_api_url(pr_url=pr_url, path="requested_reviewers")
response = await api.get(reviewers_url)
Expand Down Expand Up @@ -667,3 +845,4 @@ def prepare_pr_api_url(pr_url: str, path: str) -> str:

scheme, base_url, pr_number = match.groups()
return f"{scheme or 'https'}://{base_url}/pulls/{pr_number}/{path}"

Loading