Skip to content

Commit

Permalink
fix+feat: adds the resulting percentage of similarity by source.
Browse files Browse the repository at this point in the history
- Simplifies getting paths from the configuration file;
- Adds the resulting percentage of similarity by source.

Refs: #190
  • Loading branch information
Artanias authored Jul 4, 2024
1 parent 2ccccc7 commit d973ad5
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 70 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
UTIL_VERSION := 0.4.7
UTIL_VERSION := 0.4.8
UTIL_NAME := codeplag
PWD := $(shell pwd)

Expand Down
20 changes: 12 additions & 8 deletions locales/codeplag.pot
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.4.6\n"
"Project-Id-Version: codeplag 0.4.8\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-06-02 20:54+0300\n"
"POT-Creation-Date: 2024-06-30 12:31+0300\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: Artyom Semidolin\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.14.0\n"
"Generated-By: Babel 2.15.0\n"

#: src/codeplag/codeplagcli.py:44
msgid "You cannot specify the same value multiple times. You provided '{values}'."
Expand Down Expand Up @@ -262,7 +262,7 @@ msgid "Part of the second program"
msgstr ""

#: src/templates/general.templ:95 src/templates/general.templ:126
#: src/templates/sources.templ:61
#: src/templates/sources.templ:63
msgid "Similarity"
msgstr ""

Expand All @@ -278,19 +278,23 @@ msgstr ""
msgid "The path to the file being checked"
msgstr ""

#: src/templates/sources.templ:56
msgid "Similar parts of the programs"
#: src/templates/sources.templ:49
msgid "Total maximum compliance, %"
msgstr ""

#: src/templates/sources.templ:58
msgid "Similar parts of the programs"
msgstr ""

#: src/templates/sources.templ:60
msgid "The name of a similar file"
msgstr ""

#: src/templates/sources.templ:59
#: src/templates/sources.templ:61
msgid "Part of the file that is being checked"
msgstr ""

#: src/templates/sources.templ:60
#: src/templates/sources.templ:62
msgid "Part of a similar file"
msgstr ""

19 changes: 11 additions & 8 deletions locales/translations/en/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.4.5\n"
"Project-Id-Version: codeplag 0.4.8\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2024-05-16 19:15+0300\n"
Expand All @@ -16,7 +16,7 @@ msgstr ""
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.14.0\n"
"Generated-By: Babel 2.15.0\n"

#: src/codeplag/codeplagcli.py:44
msgid "You cannot specify the same value multiple times. You provided '{values}'."
Expand Down Expand Up @@ -279,7 +279,7 @@ msgid "Part of the second program"
msgstr "Part of the second program"

#: src/templates/general.templ:95 src/templates/general.templ:126
#: src/templates/sources.templ:61
#: src/templates/sources.templ:63
msgid "Similarity"
msgstr "Similarity"

Expand All @@ -295,19 +295,22 @@ msgstr "Verification results"
msgid "The path to the file being checked"
msgstr "The path to the file being checked"

#: src/templates/sources.templ:56
#: src/templates/sources.templ:49
msgid "Total maximum compliance, %"
msgstr "Total maximum compliance, %"

#: src/templates/sources.templ:58
msgid "Similar parts of the programs"
msgstr "Similar parts of the programs"

#: src/templates/sources.templ:58
#: src/templates/sources.templ:60
msgid "The name of a similar file"
msgstr "The name of a similar file"

#: src/templates/sources.templ:59
#: src/templates/sources.templ:61
msgid "Part of the file that is being checked"
msgstr "Part of the file that is being checked"

#: src/templates/sources.templ:60
#: src/templates/sources.templ:62
msgid "Part of a similar file"
msgstr "Part of a similar file"

25 changes: 14 additions & 11 deletions locales/translations/ru/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.4.5\n"
"Project-Id-Version: codeplag 0.4.8\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2024-05-11 12:05+0300\n"
Expand All @@ -17,7 +17,7 @@ msgstr ""
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.14.0\n"
"Generated-By: Babel 2.15.0\n"

#: src/codeplag/codeplagcli.py:44
msgid "You cannot specify the same value multiple times. You provided '{values}'."
Expand Down Expand Up @@ -261,15 +261,15 @@ msgstr "Коэффициент Жаккара"

#: src/templates/general.templ:71
msgid "Similarity of operators"
msgstr "Схожеть операторов"
msgstr "Схожесть операторов"

#: src/templates/general.templ:75
msgid "Similarity of keywords"
msgstr "Схожеть ключевых слов"
msgstr "Схожесть ключевых слов"

#: src/templates/general.templ:79
msgid "Similarity of literals"
msgstr "Схожеть литералов"
msgstr "Схожеcть литералов"

#: src/templates/general.templ:82
msgid "Weighted average by fast metrics"
Expand All @@ -292,7 +292,7 @@ msgid "Part of the second program"
msgstr "Часть второй программы"

#: src/templates/general.templ:95 src/templates/general.templ:126
#: src/templates/sources.templ:61
#: src/templates/sources.templ:63
msgid "Similarity"
msgstr "Схожесть"

Expand All @@ -308,19 +308,22 @@ msgstr "Результаты проверки"
msgid "The path to the file being checked"
msgstr "Путь до проверяемого файла"

#: src/templates/sources.templ:56
#: src/templates/sources.templ:49
msgid "Total maximum compliance, %"
msgstr "Суммарное максимальное соответствие, %"

#: src/templates/sources.templ:58
msgid "Similar parts of the programs"
msgstr "Схожие части программ"

#: src/templates/sources.templ:58
#: src/templates/sources.templ:60
msgid "The name of a similar file"
msgstr "Имя схожего файла"

#: src/templates/sources.templ:59
#: src/templates/sources.templ:61
msgid "Part of the file that is being checked"
msgstr "Часть проверяемого файла"

#: src/templates/sources.templ:60
#: src/templates/sources.templ:62
msgid "Part of a similar file"
msgstr "Часть схожего файла"

14 changes: 3 additions & 11 deletions src/codeplag/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json
from pathlib import Path
from typing import Any, ForwardRef, Literal, Mapping, overload

from typing_extensions import NotRequired
from typing import Any, Literal, Mapping, overload

from codeplag.consts import (
CONFIG_PATH,
Expand Down Expand Up @@ -59,19 +57,13 @@ def read_settings_conf() -> Settings:
)
return DefaultSettingsConfig

for key, key_type in Settings.__annotations__.items():
for key in Settings.__annotations__:
if key not in loaded_settings_config:
if key in DefaultSettingsConfig:
loaded_settings_config[key] = DefaultSettingsConfig[key]
continue

if key_type in [
Path,
NotRequired[Path], # type: ignore
# Hook for proper work of the Cythonized version.
ForwardRef("Path"),
ForwardRef("NotRequired[Path]"),
]:
if key in ["environment", "reports"]:
loaded_settings_config[key] = Path(loaded_settings_config[key])

return Settings(
Expand Down
77 changes: 46 additions & 31 deletions src/codeplag/handlers/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,50 +183,63 @@ def _get_parsed_line(
class Elements(TypedDict):
cnt_elements: int
same_parts: SameFuncs
max_funcs_same_percentages: dict[str, float]


SamePartsOfAll = dict[str, dict[str, Elements]]


def _get_resulting_same_percentages(
same_parts_of_all: SamePartsOfAll,
) -> dict[str, float]:
resulting_same_percentages: dict[str, float] = {}
for first_path, same_works in same_parts_of_all.items():
max_funcs_same_percentages = {}
for second_work in same_works.values():
for function, same_percentage in second_work[
"max_funcs_same_percentages"
].items():
if same_percentage <= max_funcs_same_percentages.get(function, 0):
continue
max_funcs_same_percentages[function] = same_percentage
if not (cnt_functions := len(max_funcs_same_percentages)):
continue
resulting_percentage = round(
sum(max_funcs_same_percentages.values()) / cnt_functions,
2,
)
resulting_same_percentages[first_path] = resulting_percentage
return resulting_same_percentages


def _search_sources(
df: pd.DataFrame, threshold: int = DEFAULT_THRESHOLD
) -> SamePartsOfAll:
same_parts_of_all: SamePartsOfAll = defaultdict(lambda: {})
for line, _, same_parts_of_second, same_parts_of_first in _get_parsed_line(
df, threshold, include_funcs_less_threshold=False
):
same_parts_of_all[line.first_path][line.second_path] = Elements(
cnt_elements=0, same_parts=deepcopy(same_parts_of_second)
)
same_parts_of_all[line.second_path][line.first_path] = Elements(
cnt_elements=0, same_parts=deepcopy(same_parts_of_first)
)
for function, same_functions in same_parts_of_second.items():
cnt_same_functions = len(same_functions)
if cnt_same_functions == 0:
same_parts_of_all[line.first_path][line.second_path]["same_parts"].pop(
function
for first_path, second_path, same_parts in (
(line.first_path, line.second_path, same_parts_of_second),
(line.second_path, line.first_path, same_parts_of_first),
):
element = same_parts_of_all[first_path][second_path] = Elements(
cnt_elements=0,
same_parts=deepcopy(same_parts),
max_funcs_same_percentages={},
)
for function, same_functions in same_parts.items():
if (cnt_same_functions := len(same_functions)) == 0:
element["same_parts"].pop(function)
continue
element["max_funcs_same_percentages"][function] = max(
same_function.percent for same_function in same_functions
)
continue
same_parts_of_all[line.first_path][line.second_path][
"cnt_elements"
] += cnt_same_functions
for function, same_functions in same_parts_of_first.items():
cnt_same_functions = len(same_functions)
if cnt_same_functions == 0:
same_parts_of_all[line.second_path][line.first_path]["same_parts"].pop(
function
)
continue
same_parts_of_all[line.second_path][line.first_path][
"cnt_elements"
] += cnt_same_functions
if same_parts_of_all[line.first_path][line.second_path]["cnt_elements"] == 0:
del same_parts_of_all[line.first_path][line.second_path]
if same_parts_of_all[line.second_path][line.first_path]["cnt_elements"] == 0:
del same_parts_of_all[line.second_path][line.first_path]
same_parts_of_all = {k: v for k, v in same_parts_of_all.items() if v}
return same_parts_of_all
element["cnt_elements"] += cnt_same_functions
if element["cnt_elements"] == 0:
del same_parts_of_all[first_path][second_path]

return {k: v for k, v in same_parts_of_all.items() if v}


def _create_report(
Expand Down Expand Up @@ -259,12 +272,14 @@ def _create_sources_report(
language: Language = DEFAULT_LANGUAGE,
) -> None:
data = _search_sources(read_df(df_path), threshold)
same_percentages = _get_resulting_same_percentages(data)
template = environment.from_string(SOURCES_TEMPLATE_PATH.read_text())
if save_path.is_dir():
save_path = save_path / DEFAULT_SOURCES_REPORT_NAME
save_path.write_text(
template.render(
data=data,
same_percentages=same_percentages,
language=language,
enumerate=enumerate,
Path=Path,
Expand Down
2 changes: 2 additions & 0 deletions src/templates/sources.templ
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@
<caption>{{ _("General information") }}</caption>
<tr class="table__header">
<th>{{ _("The path to the file being checked") }}</th>
<th>{{ _("Total maximum compliance, %") }}</th>
</tr>
<tr class="table__row">
<td>{{ work_path }}</td>
<th>{{ same_percentages[work_path] }}</th>
</tr>
</table>
<div align="center">
Expand Down
Loading

0 comments on commit d973ad5

Please sign in to comment.