-
-
Notifications
You must be signed in to change notification settings - Fork 307
Add metadata quality checker tool #5278
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| # Metadata Quality Checker Tool | ||
|
|
||
| A Python CLI tool that analyzes OWASP project metadata and reports | ||
| missing or low-quality fields affecting project discovery. | ||
|
|
||
| ## Checks Performed | ||
| - Tags presence and count | ||
| - Project type | ||
| - Difficulty level | ||
| - Pitch quality | ||
| - Repository URL | ||
| - Activity (last commit) | ||
|
|
||
| ## Usage | ||
|
|
||
| ```bash | ||
| python checker.py | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| import json | ||
| import sys | ||
| from pathlib import Path | ||
| from rules import check_rules | ||
| from score import calculate_score, get_status | ||
|
|
||
|
|
||
| def load_metadata(): | ||
| # If user passes a file path: python checker.py file.json | ||
| if len(sys.argv) > 1: | ||
| metadata_path = Path(sys.argv[1]) | ||
| else: | ||
| # Default to sample file | ||
| metadata_path = Path(__file__).parent / "sample_metadata.json" | ||
|
|
||
| if not metadata_path.exists(): | ||
| print(f"❌ Metadata file not found: {metadata_path}") | ||
| sys.exit(1) | ||
|
|
||
| with open(metadata_path, "r", encoding="utf-8") as f: | ||
| return json.load(f) | ||
|
Comment on lines
+8
to
+21
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add JSON error handling and clarify CLI interface. Issues identified:
import argparse
def load_metadata():
parser = argparse.ArgumentParser(description="OWASP Metadata Quality Checker")
parser.add_argument("--file", type=str, help="Path to metadata JSON file")
args = parser.parse_args()
if args.file:
metadata_path = Path(args.file)
else:
metadata_path = Path(__file__).parent / "sample_metadata.json"
# ... rest of logic
with open(metadata_path, "r", encoding="utf-8") as f:
- return json.load(f)
+ try:
+ data = json.load(f)
+ if not isinstance(data, list):
+ print(f"❌ Metadata file must contain a JSON array")
+ sys.exit(1)
+ return data
+ except json.JSONDecodeError as e:
+ print(f"❌ Invalid JSON in metadata file: {e}")
+ sys.exit(1)🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def main(): | ||
| metadata = load_metadata() | ||
|
|
||
| print("\n🔍 OWASP Metadata Quality Report\n") | ||
|
|
||
| for project in metadata: | ||
| issues = check_rules(project) | ||
| score = calculate_score(project) | ||
| status = get_status(score) | ||
|
|
||
| name = project.get("name", "Unnamed Project") | ||
| print(f"📦 Project: {name}") | ||
| print(f"📊 Score: {score}/100 ({status})") | ||
|
|
||
| if not issues: | ||
| print("✅ No issues found\n") | ||
| else: | ||
| for issue in issues: | ||
| print(f"❌ {issue}") | ||
| print("") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| from datetime import datetime, timedelta | ||
|
|
||
|
|
||
| def check_rules(project: dict) -> list[str]: | ||
| issues = [] | ||
|
|
||
| if not project.get("name"): | ||
| issues.append("Missing project name") | ||
|
|
||
| tags = project.get("tags", []) | ||
| if not isinstance(tags, list) or len(tags) < 2: | ||
| issues.append("Missing or insufficient tags") | ||
|
|
||
| if not project.get("type"): | ||
| issues.append("Missing project type") | ||
|
|
||
| if not project.get("level"): | ||
| issues.append("Missing difficulty level") | ||
|
|
||
| pitch = project.get("pitch", "") | ||
| if not pitch or len(pitch) < 30: | ||
| issues.append("Pitch too short or missing") | ||
|
|
||
| if not project.get("repo_url"): | ||
| issues.append("Missing repository URL") | ||
|
|
||
| last_commit = project.get("last_commit") | ||
| if last_commit: | ||
| try: | ||
| commit_date = datetime.fromisoformat(last_commit) | ||
| if commit_date < datetime.now() - timedelta(days=365): | ||
| issues.append("Project inactive (no commits in last 12 months)") | ||
| except ValueError: | ||
| issues.append("Invalid last_commit date format") | ||
| else: | ||
| issues.append("Missing activity data") | ||
|
Comment on lines
+27
to
+36
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Timezone-aware datetime comparison may cause TypeError. Line 31 compares Apply this diff to handle both timezone-aware and naive datetimes: try:
commit_date = datetime.fromisoformat(last_commit)
+ # Make comparison timezone-aware if needed
+ now = datetime.now(commit_date.tzinfo) if commit_date.tzinfo else datetime.now()
- if commit_date < datetime.now() - timedelta(days=365):
+ if commit_date < now - timedelta(days=365):
issues.append("Project inactive (no commits in last 12 months)")Alternatively, standardize on UTC: +from datetime import datetime, timedelta, timezone
...
try:
commit_date = datetime.fromisoformat(last_commit)
+ # Convert to UTC for comparison
+ if commit_date.tzinfo is None:
+ commit_date = commit_date.replace(tzinfo=timezone.utc)
- if commit_date < datetime.now() - timedelta(days=365):
+ if commit_date < datetime.now(timezone.utc) - timedelta(days=365):
issues.append("Project inactive (no commits in last 12 months)") |
||
|
|
||
| return issues | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| [ | ||
| { | ||
| "name": "OWASP Example Tool", | ||
| "tags": [], | ||
| "type": "tool", | ||
| "level": "", | ||
| "pitch": "Short", | ||
| "repo_url": "https://github.com/owasp/example", | ||
| "last_commit": "2022-01-10" | ||
| }, | ||
| { | ||
| "name": "OWASP Juice Shop", | ||
| "tags": ["appsec", "training", "nodejs"], | ||
| "type": "training", | ||
| "level": "beginner", | ||
| "pitch": "An intentionally insecure web application for security training.", | ||
| "repo_url": "https://github.com/juice-shop/juice-shop", | ||
| "last_commit": "2025-11-20" | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| def calculate_score(project: dict) -> int: | ||
| score = 0 | ||
|
|
||
| if project.get("name"): | ||
| score += 10 | ||
|
|
||
| if isinstance(project.get("tags"), list) and len(project["tags"]) >= 2: | ||
| score += 25 | ||
|
|
||
| if project.get("type"): | ||
| score += 15 | ||
|
|
||
| if project.get("level"): | ||
| score += 15 | ||
|
|
||
| pitch = project.get("pitch", "") | ||
| if pitch and len(pitch) >= 30: | ||
| score += 20 | ||
|
|
||
| if project.get("last_commit"): | ||
| score += 15 | ||
|
|
||
| return score | ||
|
|
||
|
|
||
| def get_status(score: int) -> str: | ||
| if score >= 80: | ||
| return "good" | ||
| if score >= 50: | ||
| return "needs improvement" | ||
| return "poor" | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion | 🟠 Major
Usage documentation is incomplete.
The usage section doesn't document the CLI argument for specifying a custom metadata file. Based on the implementation in checker.py (line 10-11), update the documentation to show:
In metadata_quality_checker/README.md around lines 14 to 17, the Usage section
omits the CLI positional argument for specifying a custom metadata file and
mismatches the PR note about a --file flag; update the README to show both
examples: running with the default sample (python checker.py) and running with a
custom metadata file path (python checker.py path/to/metadata.json), and add a
short note that the script currently accepts a positional file argument (not a
--file flag) so maintainers can decide whether to change the implementation to
accept a --file/--path option instead of a positional argument.