Skip to content
This repository was archived by the owner on Jan 14, 2020. It is now read-only.

Commit 723676e

Browse files
2 parents 57bd45d + 4c4c823 commit 723676e

File tree

2 files changed

+1623
-0
lines changed

2 files changed

+1623
-0
lines changed

python37/5.1. Issue_Textual_Content_Collection.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,96 @@
1+
<<<<<<< HEAD
2+
import pymongo
3+
import urllib3
4+
import requests
5+
import Basic_Functions as bfs
6+
7+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
8+
9+
# MongoDB construction
10+
mg_client = pymongo.MongoClient("mongodb://localhost:27017/")
11+
mg_db = mg_client["msr14"]
12+
13+
# GitHub API oAuth
14+
CLIENT_ID = "6aca4b66775c629cbafd"
15+
CLIENT_SECRET = "2349a0a9266e81f9f4d7df62ca49a98ca357b20c"
16+
17+
OWNER = "symfony"
18+
REPO = "symfony"
19+
FIRST_TIMER_LABELS = ["Easy Pick"]
20+
21+
22+
# ==============Functions==============
23+
def get_issue_data(owner="symfony", repo="symfony", required_labels=["good first issue"]):
24+
dataset = {}
25+
26+
# ============ Local Search ============
27+
issues = mg_db["issues"].find({"repo": repo, "owner": owner}, {"_id": 0})
28+
29+
for issue in issues:
30+
RECORD = False
31+
for label in issue["labels"]:
32+
if label["name"] in required_labels:
33+
RECORD = True
34+
if RECORD:
35+
dataset[issue["id"]] = {
36+
"title": str(issue["title"]).replace("\"", ""),
37+
"body": str(issue["body"]).replace("\"", ""),
38+
"comments": search_issue_comments(issue["number"]),
39+
"commits": search_issue_commit(issue["number"])
40+
}
41+
42+
return dataset
43+
44+
45+
def search_issue_comments(issue_number=0):
46+
comments_result = []
47+
48+
issue_comments_url = "https://api.github.com/repos/{}/{}/issues/{}/comments?client_id={}&client_secret={}".format(
49+
OWNER, REPO, issue_number, CLIENT_ID, CLIENT_SECRET)
50+
print(issue_comments_url)
51+
issue_comments = requests.get(issue_comments_url, verify=False).json()
52+
53+
for comment in issue_comments:
54+
comments_result.append(str(comment["body"]).replace("\"", ""))
55+
56+
return comments_result
57+
58+
59+
def search_issue_commit(issue_number=0):
60+
comments_result = []
61+
62+
commits_url = "https://api.github.com/repos/{}/{}/pulls/{}/commits?client_id={}&client_secret={}".format(
63+
OWNER, REPO, issue_number, CLIENT_ID, CLIENT_SECRET)
64+
print(commits_url)
65+
commits = requests.get(commits_url, verify=False).json()
66+
if "documentation_url" not in commits:
67+
for commit in commits:
68+
comments_result.append(str(commit["commit"]["message"]).replace("\"", ""))
69+
70+
# Get referenced commits
71+
issue_event_url = "https://api.github.com/repos/{}/{}/issues/{}/events?client_id={}&client_secret={}".format(
72+
OWNER, REPO, issue_number, CLIENT_ID, CLIENT_SECRET)
73+
print(issue_event_url)
74+
issue_events = requests.get(issue_event_url, verify=False).json()
75+
for issue_event in issue_events:
76+
if issue_event["event"] == "referenced":
77+
reference_url = "{}?client_id={}&client_secret={}".format(
78+
issue_event["commit_url"], CLIENT_ID, CLIENT_SECRET)
79+
print(reference_url)
80+
issue_reference = requests.get(reference_url, verify=False).json()
81+
if "url" in issue_reference.keys() and "files" in issue_reference.keys():
82+
comments_result.append(str(issue_reference["commit"]["message"]).replace("\"", ""))
83+
84+
return comments_result
85+
86+
87+
# ==============Main==============
88+
dataset_master = get_issue_data(owner=OWNER, repo=REPO, required_labels=FIRST_TIMER_LABELS)
89+
90+
bfs.writeJsonFile(data=dataset_master, name="issues_text_{}".format(REPO), folder="data/issue_text")
91+
92+
print(len(dataset_master))
93+
=======
194
import pymongo
295
import Basic_Functions as bfs
396

@@ -40,3 +133,4 @@ def getIssueData(owner="symfony", repo="symfony", required_labels=["good first i
40133
bfs.writeJsonFile(data=dataset_master, name="issues_text_{}".format(REPO), folder="data/issue_text")
41134

42135
#print(len(dataset_master))
136+
>>>>>>> d9bb0b72f761538bd084f26a41d91dfec0d96e39

0 commit comments

Comments
 (0)