|
| 1 | +<<<<<<< HEAD |
| 2 | +import pymongo |
| 3 | +import urllib3 |
| 4 | +import requests |
| 5 | +import Basic_Functions as bfs |
| 6 | + |
| 7 | +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
| 8 | + |
| 9 | +# MongoDB construction |
| 10 | +mg_client = pymongo.MongoClient("mongodb://localhost:27017/") |
| 11 | +mg_db = mg_client["msr14"] |
| 12 | + |
| 13 | +# GitHub API oAuth |
| 14 | +CLIENT_ID = "6aca4b66775c629cbafd" |
| 15 | +CLIENT_SECRET = "2349a0a9266e81f9f4d7df62ca49a98ca357b20c" |
| 16 | + |
| 17 | +OWNER = "symfony" |
| 18 | +REPO = "symfony" |
| 19 | +FIRST_TIMER_LABELS = ["Easy Pick"] |
| 20 | + |
| 21 | + |
| 22 | +# ==============Functions============== |
| 23 | +def get_issue_data(owner="symfony", repo="symfony", required_labels=["good first issue"]): |
| 24 | + dataset = {} |
| 25 | + |
| 26 | + # ============ Local Search ============ |
| 27 | + issues = mg_db["issues"].find({"repo": repo, "owner": owner}, {"_id": 0}) |
| 28 | + |
| 29 | + for issue in issues: |
| 30 | + RECORD = False |
| 31 | + for label in issue["labels"]: |
| 32 | + if label["name"] in required_labels: |
| 33 | + RECORD = True |
| 34 | + if RECORD: |
| 35 | + dataset[issue["id"]] = { |
| 36 | + "title": str(issue["title"]).replace("\"", ""), |
| 37 | + "body": str(issue["body"]).replace("\"", ""), |
| 38 | + "comments": search_issue_comments(issue["number"]), |
| 39 | + "commits": search_issue_commit(issue["number"]) |
| 40 | + } |
| 41 | + |
| 42 | + return dataset |
| 43 | + |
| 44 | + |
| 45 | +def search_issue_comments(issue_number=0): |
| 46 | + comments_result = [] |
| 47 | + |
| 48 | + issue_comments_url = "https://api.github.com/repos/{}/{}/issues/{}/comments?client_id={}&client_secret={}".format( |
| 49 | + OWNER, REPO, issue_number, CLIENT_ID, CLIENT_SECRET) |
| 50 | + print(issue_comments_url) |
| 51 | + issue_comments = requests.get(issue_comments_url, verify=False).json() |
| 52 | + |
| 53 | + for comment in issue_comments: |
| 54 | + comments_result.append(str(comment["body"]).replace("\"", "")) |
| 55 | + |
| 56 | + return comments_result |
| 57 | + |
| 58 | + |
| 59 | +def search_issue_commit(issue_number=0): |
| 60 | + comments_result = [] |
| 61 | + |
| 62 | + commits_url = "https://api.github.com/repos/{}/{}/pulls/{}/commits?client_id={}&client_secret={}".format( |
| 63 | + OWNER, REPO, issue_number, CLIENT_ID, CLIENT_SECRET) |
| 64 | + print(commits_url) |
| 65 | + commits = requests.get(commits_url, verify=False).json() |
| 66 | + if "documentation_url" not in commits: |
| 67 | + for commit in commits: |
| 68 | + comments_result.append(str(commit["commit"]["message"]).replace("\"", "")) |
| 69 | + |
| 70 | + # Get referenced commits |
| 71 | + issue_event_url = "https://api.github.com/repos/{}/{}/issues/{}/events?client_id={}&client_secret={}".format( |
| 72 | + OWNER, REPO, issue_number, CLIENT_ID, CLIENT_SECRET) |
| 73 | + print(issue_event_url) |
| 74 | + issue_events = requests.get(issue_event_url, verify=False).json() |
| 75 | + for issue_event in issue_events: |
| 76 | + if issue_event["event"] == "referenced": |
| 77 | + reference_url = "{}?client_id={}&client_secret={}".format( |
| 78 | + issue_event["commit_url"], CLIENT_ID, CLIENT_SECRET) |
| 79 | + print(reference_url) |
| 80 | + issue_reference = requests.get(reference_url, verify=False).json() |
| 81 | + if "url" in issue_reference.keys() and "files" in issue_reference.keys(): |
| 82 | + comments_result.append(str(issue_reference["commit"]["message"]).replace("\"", "")) |
| 83 | + |
| 84 | + return comments_result |
| 85 | + |
| 86 | + |
| 87 | +# ==============Main============== |
| 88 | +dataset_master = get_issue_data(owner=OWNER, repo=REPO, required_labels=FIRST_TIMER_LABELS) |
| 89 | + |
| 90 | +bfs.writeJsonFile(data=dataset_master, name="issues_text_{}".format(REPO), folder="data/issue_text") |
| 91 | + |
| 92 | +print(len(dataset_master)) |
| 93 | +======= |
1 | 94 | import pymongo
|
2 | 95 | import Basic_Functions as bfs
|
3 | 96 |
|
@@ -40,3 +133,4 @@ def getIssueData(owner="symfony", repo="symfony", required_labels=["good first i
|
40 | 133 | bfs.writeJsonFile(data=dataset_master, name="issues_text_{}".format(REPO), folder="data/issue_text")
|
41 | 134 |
|
42 | 135 | #print(len(dataset_master))
|
| 136 | +>>>>>>> d9bb0b72f761538bd084f26a41d91dfec0d96e39 |
0 commit comments