Skip to content

Commit 25328e5

Browse files
committed
few improvements
- percentage matches of both files are returned now. - new insight metrics - docs update
1 parent f71cab1 commit 25328e5

3 files changed

Lines changed: 31 additions & 13 deletions

File tree

demo.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@
66
from dotenv import load_dotenv
77
load_dotenv()
88

9-
language = "python"
9+
language = "java"
1010
userid = os.environ["USER_ID"]
1111

1212

1313
moss = plagcheck.check(language, userid)
1414

15-
moss.addFilesByWildCard("testfiles/test_python*.py")
15+
moss.addFilesByWildCard("testfiles/test_java*.java")
1616

1717
# or moss.addFile("testfiles/test_python.py")
1818

1919
moss.submit()
2020

2121
print(moss.getHomePage())
2222
pprint.pprint(moss.getResults())
23-
pprint.pprint(moss.getInsights())
23+
pprint.pprint(moss.getShareScores())
24+
pprint.pprint(moss.getDistributors())
25+
pprint.pprint(moss.getCulprits())

docs/changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55

66
### Added
77
- New methods: `getHomePage()`, `submit()`, `getResults()`, `addBaseCode()`. See Documentation for usage.
8+
- Percentage Matches of both files are returned now.
89

910
### Changed
1011
- The plagcheck module is now more modularised. `check` is now a class.
12+
- `__get_line_numbers()` now runs in a new thread.
1113

1214
### Removed
1315
- `requests` as a dependency, network requests are now 50% faster.

plagcheck/plagcheck.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ def __extract_info(self) -> Results:
6767
table = html.find("table")
6868
for row in table.find_all("tr")[1:]:
6969
col1, col2, col3 = row.find_all("td")
70-
filename1, perc = col1.text.strip().split()
71-
filename2, ____ = col2.text.strip().split()
70+
filename1, perc1 = col1.text.strip().split()
71+
filename2, perc2 = col2.text.strip().split()
7272

7373
with ThreadPoolExecutor() as executor:
7474
future = executor.submit(self.__get_line_numbers, col1.a.get("href"))
@@ -77,7 +77,8 @@ def __extract_info(self) -> Results:
7777
result_dict = Result(
7878
file1=filename1,
7979
file2=filename2,
80-
percentage=perc_str_to_int(perc),
80+
percentage_file1=perc_str_to_int(perc1),
81+
percentage_file2=perc_str_to_int(perc2),
8182
no_of_lines_matched=int(col3.text.strip()),
8283
lines_matched=lines,
8384
# lines_matched=self.__get_line_numbers(col1.a.get("href")),
@@ -103,8 +104,8 @@ def __get_line_numbers(self, url: str) -> List[List[str]]:
103104
list_of_line_nos.append(matched_lines)
104105
return list_of_line_nos
105106

106-
def addFilesByWildCard(self, file):
107-
self.__moss.addFilesByWildcard(file)
107+
def addFilesByWildCard(self, files):
108+
self.__moss.addFilesByWildcard(files)
108109

109110
def addFile(self, file):
110111
self.__moss.addFile(file)
@@ -118,32 +119,45 @@ def submit(self):
118119
url = self.__moss.send()
119120

120121
self.home_url = url
122+
self.moss_results = self.__extract_info()
121123

122124
def getHomePage(self):
123125
"""Return Moss Results HomePage URL"""
124126
return self.home_url
125127

126128
def getResults(self) -> Tuple[str, Results]:
127129
"""Return the result as a list of dictionary"""
128-
self.moss_results = self.__extract_info()
129130

130131
return self.moss_results
131132

132-
def getInsights(self):
133+
def getShareScores(self):
133134
"""Share Score Insights WIP"""
134135
similar_code_files = []
136+
culprits = []
135137
for result in self.moss_results:
136138
similar_code_files.append(result['file1'])
137139
similar_code_files.append(result['file2'])
138140

139-
# count of files which are similar
141+
# frequency of files which are similar
140142
share_score = collections.Counter(similar_code_files)
141143

142-
# code which has been similar to most of the files
144+
# code which is similar to most of the files
143145
distributor_score = max(share_score.values())
144146

145147
for key, value in share_score.items():
146148
if value == distributor_score:
147-
distributor = key
149+
self.__distributors = key
150+
else:
151+
culprits.append(key)
152+
153+
self.__culprits = culprits
148154

149155
return dict(share_score)
156+
157+
def getDistributors(self):
158+
"""Potential distributor who shared their code"""
159+
return self.__distributors
160+
161+
def getCulprits(self):
162+
"""Potential Culprits who copied the code"""
163+
return self.__culprits

0 commit comments

Comments
 (0)