Skip to content

Commit a1159d7

Browse files
Merge pull request #110 from smokeyScraper/contributor_recommendation_tool
[feat]: implement github contributor recommendation tool
2 parents ae8dfc1 + 48b8b27 commit a1159d7

File tree

10 files changed

+334
-7
lines changed

10 files changed

+334
-7
lines changed

backend/app/agents/devrel/github/github_toolkit.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT
77
from .tools.search import handle_web_search
88
# TODO: Implement all tools
9-
# from .tools.contributor_recommendation import handle_contributor_recommendation
9+
from .tools.contributor_recommendation import handle_contributor_recommendation
1010
# from .tools.repository_query import handle_repo_query
1111
# from .tools.issue_creation import handle_issue_creation
1212
# from .tools.documentation_generation import handle_documentation_generation
@@ -102,8 +102,7 @@ async def execute(self, query: str) -> Dict[str, Any]:
102102
logger.info(f"Executing {classification} for query")
103103

104104
if classification == "contributor_recommendation":
105-
result = "Not implemented"
106-
# result = await handle_contributor_recommendation(query)
105+
result = await handle_contributor_recommendation(query)
107106
elif classification == "repo_support":
108107
result = "Not implemented"
109108
# result = await handle_repo_query(query)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
ISSUE_SUMMARIZATION_PROMPT = """You are a technical analyst optimizing GitHub issues for contributor search.
2+
3+
Analyze the provided GitHub issue and create a technical summary optimized for finding relevant expert contributors.
4+
5+
Focus on:
6+
- Core technical problem or feature request
7+
- Specific technologies, frameworks, libraries, APIs mentioned
8+
- Technical skills and expertise required to solve this
9+
- Programming languages and tools involved
10+
- System components affected (frontend, backend, database, etc.)
11+
12+
Create a summary that reads like a job requirement for finding the right technical expert.
13+
14+
**GitHub Issue Content:**
15+
---
16+
{issue_content}
17+
---
18+
19+
**Optimized Technical Summary for Contributor Search:**
20+
"""
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
QUERY_ALIGNMENT_PROMPT = """Analyze this contributor recommendation request and process it for technical search:
2+
3+
USER REQUEST: {query}
4+
5+
Your task:
6+
1. Extract the core technical requirements
7+
2. Generate a clean, technical search query optimized for finding contributor profiles
8+
3. Extract specific keywords that would appear in developer profiles (languages, frameworks, tools, domains)
9+
10+
Guidelines:
11+
- aligned_query: Convert user request into clear technical language that matches how developers describe their skills
12+
- keywords: Extract 3-5 specific technical terms (React, Python, API, database, etc.)
13+
- Focus on technologies, not business requirements
14+
- Make it searchable against developer profiles and contribution history
15+
16+
Example transformations:
17+
18+
Input: "I need help with our Stripe payment integration issue"
19+
Output: {{"query_type": "general", "aligned_query": "developer with payment processing and Stripe API integration experience", "keywords": ["Stripe", "payment", "API", "integration"], "technical_domain": "backend"}}
20+
21+
Input: "Find experts for database optimization"
22+
Output: {{"query_type": "general", "aligned_query": "backend developer with database performance optimization experience", "keywords": ["database", "optimization", "performance", "SQL"], "technical_domain": "backend"}}
23+
24+
Input: "https://github.com/owner/repo/issues/123 - authentication bug"
25+
Output: {{"query_type": "github_issue", "aligned_query": "developer with authentication and security implementation experience", "keywords": ["authentication", "security", "OAuth", "JWT"], "technical_domain": "security"}}
26+
27+
IMPORTANT FORMATTING RULES:
28+
- DO NOT use markdown formatting
29+
- DO NOT wrap in code blocks (```)
30+
- DO NOT add any text before or after the JSON
31+
- DO NOT add explanations
32+
- Return EXACTLY this format: {{"query_type": "...", "aligned_query": "...", "keywords": [...], "technical_domain": "..."}}
33+
34+
Expected JSON schema:
35+
{{"query_type": "github_issue" | "general", "aligned_query": "clean technical search text", "keywords": ["keyword1", "keyword2"], "technical_domain": "frontend|backend|fullstack|ml|devops|mobile|security|other"}}
36+
37+
Return the JSON object only:"""

backend/app/agents/devrel/github/prompts/intent_analysis.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
AVAILABLE FUNCTIONS:
44
- web_search: Search the web for information
5-
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate
5+
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate (supports both issue URLs and general queries)
66
- repo_support: Questions about codebase structure, dependencies, impact analysis, architecture
77
- issue_creation: Creating bug reports, feature requests, or tracking items
88
- documentation_generation: Generating docs, READMEs, API docs, guides, or explanations
@@ -12,7 +12,13 @@
1212
USER QUERY: {user_query}
1313
1414
Classification guidelines:
15-
- contributor_recommendation: Finding reviewers, assignees, collaborators
15+
- contributor_recommendation:
16+
* "who should review this PR/issue?"
17+
* "find experts in React/Python/ML"
18+
* "recommend assignees for stripe integration"
19+
* "best people for database optimization"
20+
* URLs like github.com/owner/repo/issues/123
21+
* "I need help with RabbitMQ, can you suggest some people?"
1622
- repo_support: Code structure, dependencies, impact analysis, architecture
1723
- issue_creation: Creating bugs, features, tracking items
1824
- documentation_generation: Docs, READMEs, guides, explanations
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,170 @@
1+
import logging
2+
import re
3+
from typing import Any, Dict
4+
from urllib.parse import urlparse
5+
from langchain_core.messages import HumanMessage
6+
from langchain_google_genai import ChatGoogleGenerativeAI
17

8+
from app.core.config import settings
9+
from app.database.weaviate.operations import search_contributors
10+
from app.services.github.issue_processor import GitHubIssueProcessor
11+
from app.services.embedding_service.service import EmbeddingService
12+
from ..prompts.contributor_recommendation.query_alignment import QUERY_ALIGNMENT_PROMPT
13+
14+
logger = logging.getLogger(__name__)
15+
16+
class ContributorRecommendationWorkflow:
17+
"""
18+
Contributor recommendation with proper query alignment for hybrid search.
19+
"""
20+
21+
def __init__(self):
22+
self.query_alignment_llm = ChatGoogleGenerativeAI(
23+
model=settings.github_agent_model,
24+
temperature=0.1,
25+
google_api_key=settings.gemini_api_key
26+
)
27+
self.embedding_service = EmbeddingService()
28+
29+
async def _align_user_request(self, query: str) -> Dict[str, Any]:
30+
"""
31+
Align user request into optimized format for hybrid search.
32+
Extract clean technical query + keywords that match contributor profiles.
33+
"""
34+
logger.info("Aligning user request for hybrid search optimization")
35+
36+
url_match = re.search(r'https?://github\.com/[\w-]+/[\w.-]+/issues/\d+', query)
37+
38+
if url_match:
39+
issue_content = await self._fetch_github_issue_content(url_match.group(0))
40+
full_query = f"{query}\n\nIssue content: {issue_content}"
41+
else:
42+
full_query = query
43+
44+
prompt = QUERY_ALIGNMENT_PROMPT.format(query=full_query)
45+
response = await self.query_alignment_llm.ainvoke([HumanMessage(content=prompt)])
46+
47+
try:
48+
import json
49+
print(response)
50+
result = json.loads(response.content.strip())
51+
logger.info(f"Query aligned: '{result.get('aligned_query')}' with keywords: {result.get('keywords')}")
52+
return result
53+
except json.JSONDecodeError:
54+
logger.warning("Failed to parse alignment result, using fallback")
55+
return {
56+
"query_type": "general",
57+
"aligned_query": query,
58+
"keywords": [],
59+
"technical_domain": "other"
60+
}
61+
62+
async def _fetch_github_issue_content(self, github_url: str) -> str:
63+
"""Fetch GitHub issue content."""
64+
try:
65+
parsed_url = urlparse(github_url)
66+
path_parts = parsed_url.path.strip('/').split('/')
67+
68+
if len(path_parts) >= 4 and path_parts[2] == "issues":
69+
owner, repo, issue_number = path_parts[0], path_parts[1], int(path_parts[3])
70+
processor = GitHubIssueProcessor(owner, repo, issue_number)
71+
72+
content = await processor.fetch_issue_content()
73+
return content
74+
else:
75+
raise ValueError("Invalid GitHub issue URL")
76+
77+
except Exception as e:
78+
logger.error(f"GitHub issue fetching failed: {e}")
79+
raise
80+
81+
async def handle_contributor_recommendation(query: str) -> Dict[str, Any]:
82+
"""
83+
Main entry point with unified query processing.
84+
"""
85+
logger.info(f"Processing contributor recommendation: {query[:100]}...")
86+
87+
try:
88+
workflow = ContributorRecommendationWorkflow()
89+
90+
alignment_result = await workflow._align_user_request(query)
91+
search_text = alignment_result.get("aligned_query", query)
92+
93+
logger.info("Generating embedding for semantic search")
94+
enhanced_search_text = f"Looking for contributor with expertise in: {search_text}"
95+
query_embedding = await workflow.embedding_service.get_embedding(enhanced_search_text)
96+
logger.info(f"Generated embedding with dimension: {len(query_embedding)}")
97+
98+
logger.info("Performing hybrid search (semantic + keyword matching)")
99+
100+
results = await search_contributors(
101+
query_embedding=query_embedding,
102+
keywords=alignment_result.get("keywords", []),
103+
limit=5,
104+
vector_weight=0.7, # Semantic similarity
105+
bm25_weight=0.3 # Keyword matching
106+
)
107+
108+
logger.info(f"Search complete: Found {len(results)} potential contributors")
109+
110+
if not results:
111+
logger.info("No contributors found matching the search criteria")
112+
return {
113+
"status": "success",
114+
"recommendations": [],
115+
"message": "No suitable contributors found",
116+
"search_query": search_text,
117+
"keywords_used": alignment_result.get("keywords", []),
118+
"technical_domain": alignment_result.get("technical_domain", "other")
119+
}
120+
121+
logger.info("Formatting recommendations with scores")
122+
recommendations = []
123+
for contributor in results:
124+
languages = contributor.get('languages', [])
125+
topics = contributor.get('topics', [])
126+
hybrid_score = contributor.get('hybrid_score', 0)
127+
vector_score = contributor.get('vector_score', 0)
128+
bm25_score = contributor.get('bm25_score', 0)
129+
130+
reason_parts = []
131+
if languages:
132+
reason_parts.append(f"Expert in {', '.join(languages)}")
133+
if topics:
134+
reason_parts.append(f"Active in {', '.join(topics)}")
135+
136+
username = contributor.get("github_username")
137+
recommendation = {
138+
"user": username,
139+
"reason": " • ".join(reason_parts) if reason_parts else "Strong technical match",
140+
"search_score": round(hybrid_score, 4),
141+
"vector_score": round(vector_score, 4),
142+
"keyword_score": round(bm25_score, 4),
143+
"languages": languages,
144+
"topics": topics
145+
}
146+
147+
recommendations.append(recommendation)
148+
logger.info(
149+
f"@{username} (score: {hybrid_score:.4f}) - {reason_parts[0] if reason_parts else 'Technical match'}")
150+
151+
logger.info(f"Successfully generated {len(recommendations)} contributor recommendations")
152+
153+
return {
154+
"status": "success",
155+
"recommendations": recommendations,
156+
"message": f"Found {len(recommendations)} suitable contributors",
157+
"search_query": search_text,
158+
"keywords_used": alignment_result.get("keywords", []),
159+
"technical_domain": alignment_result.get("technical_domain", "other"),
160+
"search_metadata": {
161+
"total_candidates": len(results),
162+
"vector_weight": 0.7,
163+
"keyword_weight": 0.3,
164+
"embedding_dimension": len(query_embedding)
165+
}
166+
}
167+
168+
except Exception as e:
169+
logger.error(f"Error in contributor recommendation: {str(e)}", exc_info=True)
170+
return {"status": "error", "message": str(e)}

backend/app/agents/devrel/prompts/response_prompt.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,25 @@
2626
- Avoid complex markdown formatting like **bold** or *italic*
2727
- Use plain text with clear line breaks
2828
- Format links as plain URLs: https://example.com
29-
- Use simple emojis for visual appeal: 🔗 📚 ⚡
29+
- Use simple emojis for visual appeal
3030
- Keep paragraphs short and scannable
3131
- Use "→" for arrows instead of markdown arrows
3232
33+
SPECIAL FORMATTING FOR CONTRIBUTOR RECOMMENDATIONS:
34+
If the task result contains contributor recommendations:
35+
- Start with "Found X Contributors"
36+
- Show search query used and keywords
37+
- For each contributor: "1. username (Score: X.XXX)"
38+
- Include their expertise/reason for recommendation
39+
- End with metadata about search and actionable guidance
40+
3341
Instructions:
3442
1. Synthesize all information - Use reasoning process, tool results, and task results together
3543
2. Address the user's needs - Focus on what they're trying to accomplish
3644
3. Be actionable - Provide specific steps, resources, or guidance
3745
4. Stay DevRel-focused - Be encouraging, helpful, and community-oriented
3846
5. Reference sources - Mention what you researched or considered when relevant
3947
6. Format for readability - Clean, simple text that displays well
48+
7. For contributor recommendations - Use the special formatting above to show scores and details
4049
4150
Create a helpful, comprehensive response:"""

backend/app/api/v1/auth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from fastapi.responses import HTMLResponse
33
from app.database.supabase.client import get_supabase_client
44
from app.services.auth.verification import find_user_by_session_and_verify, get_verification_session_info
5-
from app.services.user.profiling import profile_user_from_github
5+
from app.services.github.user.profiling import profile_user_from_github
66
from typing import Optional
77
import logging
88
import asyncio
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import logging
2+
from typing import List
3+
from langchain_google_genai import ChatGoogleGenerativeAI
4+
from langchain_core.messages import HumanMessage
5+
6+
from app.core.config import settings
7+
from app.services.embedding_service.service import EmbeddingService
8+
from app.services.github.user.profiling import GitHubUserProfiler
9+
from app.agents.devrel.github.prompts.contributor_recommendation.issue_summarization import ISSUE_SUMMARIZATION_PROMPT
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
class GitHubIssueProcessor:
15+
"""
16+
A service to fetch, summarize, and embed a GitHub issue.
17+
"""
18+
19+
def __init__(self, owner: str, repo: str, issue_number: int):
20+
self.owner = owner
21+
self.repo = repo
22+
self.issue_number = issue_number
23+
self.summarizer_llm = ChatGoogleGenerativeAI(
24+
model=settings.github_agent_model,
25+
temperature=0.1,
26+
google_api_key=settings.gemini_api_key
27+
)
28+
self.embedding_service = EmbeddingService()
29+
30+
async def fetch_issue_content(self) -> str:
31+
"""
32+
Fetches and consolidates all text content from a GitHub issue.
33+
"""
34+
logger.info(f"Fetching content for {self.owner}/{self.repo}#{self.issue_number}")
35+
async with GitHubUserProfiler() as profiler:
36+
issue_url = f"{profiler.base_url}/repos/{self.owner}/{self.repo}/issues/{self.issue_number}"
37+
comments_url = f"{issue_url}/comments"
38+
39+
issue_data = await profiler.request(issue_url)
40+
if not issue_data:
41+
raise ValueError("Failed to fetch issue data.")
42+
43+
content_parts = [
44+
f"Title: {issue_data['title']}",
45+
f"Body: {issue_data['body']}",
46+
]
47+
48+
comments_data = await profiler.request(comments_url)
49+
if comments_data:
50+
comment_texts = [
51+
f"Comment by {c['user']['login']}: {c['body']}"
52+
for c in comments_data if c.get('body')
53+
]
54+
content_parts.extend(comment_texts)
55+
56+
return "\n\n---\n\n".join(content_parts)
57+
58+
async def _summarize_context(self, content: str) -> str:
59+
"""Generates a technical summary of the issue content using an LLM."""
60+
logger.info(f"Summarizing issue content for {self.owner}/{self.repo}#{self.issue_number}")
61+
prompt = ISSUE_SUMMARIZATION_PROMPT.format(issue_content=content)
62+
response = await self.summarizer_llm.ainvoke([HumanMessage(content=prompt)])
63+
logger.info(f"Generated summary: {response.content.strip()[:100]}")
64+
return response.content.strip()
65+
66+
async def get_embedding_for_issue(self) -> List[float]:
67+
"""
68+
Orchestrates the entire process: fetch, summarize, and embed.
69+
Returns a vector embedding representing the issue.
70+
"""
71+
try:
72+
content = await self.fetch_issue_content()
73+
if not content:
74+
raise ValueError("Failed to fetch issue content.")
75+
76+
summary = await self._summarize_context(content)
77+
78+
logger.info("Embedding issue summary")
79+
embedding = await self.embedding_service.get_embedding(summary)
80+
return embedding
81+
except Exception as e:
82+
logger.error(f"Error processing issue {self.owner}/{self.repo}#{self.issue_number}: {str(e)}")
83+
raise e
File renamed without changes.

0 commit comments

Comments
 (0)