Skip to content

Commit e3b46e6

Browse files
author
ABaldwinHunter
committed
Tune duplication - 3 commits
1. FIX - Use node size as mass, instead of flay score 2. FIX - Report issue for each instance of duplicated code, not just first sexp. 3. UPDATE: Tune Python Remediation Points - Reduce AST threshold from 40 to 32 (classic is 28) - update point formula to match classic computation - don't penalize extra for identical duplication Change from remediations_points = x * score to remediation_points = x + (score-threshold) * y Since remediation points are a function of effort required to fix an issue, we're making a behavioral change to not penalize extra for identical duplication. 4. Clean specs
1 parent 4e3426e commit e3b46e6

File tree

13 files changed

+217
-152
lines changed

13 files changed

+217
-152
lines changed

config/contents/duplicated_code.md.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ When you violate DRY, bugs and maintenance problems are sure to follow. Duplicat
99
## Issue Mass
1010

1111
Duplicated code has a calculated mass, which can be thought of as a measure of how much logic has been duplicated.
12-
This issue has a mass of `<%= issue.mass %>`: if you would like to change the minimum mass that will be reported as an issue, please see the details in [`codeclimate-duplication`'s documentation](https://github.com/codeclimate/codeclimate-duplication).
12+
This issue has a mass of `<%= mass %>`: if you would like to change the minimum mass that will be reported as an issue, please see the details in [`codeclimate-duplication`'s documentation](https://github.com/codeclimate/codeclimate-duplication).
1313

1414
## Refactorings
1515

lib/cc/engine/analyzers/analyzer_base.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def mass_threshold
3636
engine_config.mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLD
3737
end
3838

39-
def calculate_points(issue)
40-
self.class::BASE_POINTS * issue.mass
39+
def calculate_points(mass)
40+
self.class::BASE_POINTS * mass
4141
end
4242

4343
private

lib/cc/engine/analyzers/issue.rb

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
require "cc/engine/analyzers/sexp_lines"
2+
require "cc/engine/analyzers/violation_read_up"
3+
require "digest"
4+
5+
module CC
6+
module Engine
7+
module Analyzers
8+
class Issue
9+
def initialize(language_strategy:, check_name:, current_sexp:, other_sexps:)
10+
@language_strategy = language_strategy
11+
@check_name = check_name
12+
@current_sexp = current_sexp
13+
@other_sexps = other_sexps
14+
end
15+
16+
def format # rubocop:disable Metrics/MethodLength
17+
{
18+
"type": "issue",
19+
"check_name": check_name,
20+
"description": description,
21+
"categories": ["Duplication"],
22+
"location": format_location,
23+
"remediation_points": calculate_points,
24+
"other_locations": format_other_locations,
25+
"content": content_body,
26+
"fingerprint": fingerprint,
27+
}
28+
end # rubocop:enable Metrics/MethodLength
29+
30+
def report_name
31+
"#{current_sexp.file}-#{current_sexp.line}"
32+
end
33+
34+
def mass
35+
current_sexp.mass
36+
end
37+
38+
private
39+
40+
attr_reader :language_strategy, :check_name, :other_sexps, :current_sexp
41+
42+
def calculate_points
43+
language_strategy.calculate_points(mass)
44+
end
45+
46+
def format_location
47+
format_sexp(current_sexp)
48+
end
49+
50+
def format_other_locations
51+
other_sexps.map do |sexp|
52+
format_sexp(sexp)
53+
end
54+
end
55+
56+
def format_sexp(sexp)
57+
lines = SexpLines.new(sexp)
58+
{
59+
"path": sexp.file.gsub(%r{^./}, ""),
60+
"lines": {
61+
"begin": lines.begin_line,
62+
"end": lines.end_line,
63+
},
64+
}
65+
end
66+
67+
def content_body
68+
{ "body": ViolationReadUp.new(mass).contents }
69+
end
70+
71+
def fingerprint
72+
digest = Digest::MD5.new
73+
digest << current_sexp.file
74+
digest << "-"
75+
digest << current_sexp.mass.to_s
76+
digest << "-"
77+
digest << check_name
78+
digest.to_s
79+
end
80+
81+
def description
82+
description = "#{check_name} found in #{(other_sexps.length)} other location"
83+
description += "s" if other_sexps.length > 1
84+
description
85+
end
86+
end
87+
end
88+
end
89+
end

lib/cc/engine/analyzers/python/main.rb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,20 @@ module Python
1212
class Main < CC::Engine::Analyzers::Base
1313
LANGUAGE = "python"
1414
DEFAULT_PATHS = ["**/*.py"]
15-
DEFAULT_MASS_THRESHOLD = 40
16-
BASE_POINTS = 1000
15+
DEFAULT_MASS_THRESHOLD = 32
16+
BASE_POINTS = 1_500_000
17+
POINTS_PER_OVERAGE = 50_000
18+
19+
def calculate_points(mass)
20+
BASE_POINTS + (overage(mass) * POINTS_PER_OVERAGE)
21+
end
1722

1823
private
1924

25+
def overage(mass)
26+
mass - mass_threshold
27+
end
28+
2029
def process_file(path)
2130
Node.new(::CC::Engine::Analyzers::Python::Parser.new(File.binread(path), path).parse.syntax_tree, path).format
2231
end

lib/cc/engine/analyzers/reporter.rb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,11 @@ def report
3838
flay.report(StringIO.new).each do |issue|
3939
violation = new_violation(issue)
4040

41-
unless reports.include?(violation.report_name)
42-
reports.add(violation.report_name)
43-
io.puts "#{violation.format.to_json}\0"
41+
violation.occurrences.each do |occurrence|
42+
unless reports.include?(occurrence.report_name)
43+
reports.add(occurrence.report_name)
44+
io.puts "#{occurrence.format.to_json}\0"
45+
end
4446
end
4547
end
4648
end

lib/cc/engine/analyzers/ruby/main.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ class Main < CC::Engine::Analyzers::Base
2222
POINTS_PER_OVERAGE = 100_000
2323
TIMEOUT = 300
2424

25-
def calculate_points(issue)
26-
BASE_POINTS + (overage(issue) * POINTS_PER_OVERAGE)
25+
def calculate_points(mass)
26+
BASE_POINTS + (overage(mass) * POINTS_PER_OVERAGE)
2727
end
2828

2929
private
3030

31-
def overage(issue)
32-
issue.mass - mass_threshold
31+
def overage(mass)
32+
mass - mass_threshold
3333
end
3434

3535
def process_file(file)

lib/cc/engine/analyzers/violation.rb

Lines changed: 10 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,24 @@
1-
require "cc/engine/analyzers/sexp_lines"
2-
require "cc/engine/analyzers/violation_read_up"
3-
require "digest"
1+
require "cc/engine/analyzers/issue"
42

53
module CC
64
module Engine
75
module Analyzers
86
class Violation
9-
attr_reader :issue
10-
117
def initialize(language_strategy, issue, hashes)
128
@language_strategy = language_strategy
13-
@issue = issue
149
@hashes = hashes
10+
@issue = issue
1511
end
1612

17-
def format
18-
{
19-
"type": "issue",
20-
"check_name": check_name,
21-
"description": description,
22-
"categories": ["Duplication"],
23-
"location": format_location,
24-
"remediation_points": calculate_points,
25-
"other_locations": format_other_locations,
26-
"content": content_body,
27-
"fingerprint": fingerprint
28-
}
29-
end
30-
31-
def report_name
32-
"#{current_sexp.file}-#{current_sexp.line}"
13+
def occurrences
14+
hashes.map.with_index do |sexp, i|
15+
Issue.new(language_strategy: language_strategy, check_name: check_name, current_sexp: sexp, other_sexps: other_sexps(hashes.dup, i))
16+
end
3317
end
3418

3519
private
3620

37-
attr_reader :language_strategy, :hashes
38-
39-
def current_sexp
40-
@location ||= sorted_hashes.first
41-
end
42-
43-
def sorted_hashes
44-
@_sorted_hashes ||= hashes.sort_by(&:file)
45-
end
46-
47-
def other_sexps
48-
@other_locations ||= sorted_hashes.drop(1)
49-
end
21+
attr_reader :language_strategy, :hashes, :issue
5022

5123
def check_name
5224
if issue.identical?
@@ -56,53 +28,9 @@ def check_name
5628
end
5729
end
5830

59-
def calculate_points
60-
language_strategy.calculate_points(issue)
61-
end
62-
63-
def format_location
64-
format_sexp(current_sexp)
65-
end
66-
67-
def format_other_locations
68-
other_sexps.map do |sexp|
69-
format_sexp(sexp)
70-
end
71-
end
72-
73-
def format_sexp(sexp)
74-
lines = SexpLines.new(sexp)
75-
{
76-
"path": sexp.file.gsub(%r(^./), ""),
77-
"lines": {
78-
"begin": lines.begin_line,
79-
"end": lines.end_line,
80-
},
81-
}
82-
end
83-
84-
def content_body
85-
@_content_body ||= { "body": ViolationReadUp.new(issue).contents }
86-
end
87-
88-
def fingerprint
89-
digest = Digest::MD5.new
90-
digest << current_sexp.file
91-
digest << "-"
92-
digest << current_sexp.mass.to_s
93-
digest << "-"
94-
digest << occurrences.to_s
95-
digest.to_s
96-
end
97-
98-
def description
99-
description = "#{check_name} found in #{occurrences} other location"
100-
description += "s" if occurrences > 1
101-
description
102-
end
103-
104-
def occurrences
105-
other_sexps.count
31+
def other_sexps(members, i)
32+
members.delete_at(i)
33+
members.sort_by(&:file)
10634
end
10735
end
10836
end

lib/cc/engine/analyzers/violation_read_up.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ module CC
44
module Engine
55
module Analyzers
66
class ViolationReadUp
7-
def initialize(issue)
8-
@issue = issue
7+
def initialize(mass)
8+
@mass = mass
99
end
1010

1111
def contents
@@ -14,7 +14,7 @@ def contents
1414

1515
private
1616

17-
attr_reader :issue
17+
attr_reader :mass
1818

1919
TEMPLATE_REL_PATH = "../../../../config/contents/duplicated_code.md.erb"
2020

spec/cc/engine/analyzers/javascript/main_spec.rb

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1+
require 'spec_helper'
12
require 'cc/engine/analyzers/javascript/main'
23
require 'cc/engine/analyzers/reporter'
34
require 'cc/engine/analyzers/engine_config'
45
require 'cc/engine/analyzers/file_list'
5-
require 'flay'
6-
require 'tmpdir'
76

87
RSpec.describe CC::Engine::Analyzers::Javascript::Main, in_tmpdir: true do
98
include AnalyzerSpecHelpers
@@ -16,7 +15,9 @@
1615
console.log("hello JS!");
1716
EOJS
1817

19-
result = run_engine(engine_conf).strip
18+
issues = run_engine(engine_conf).strip.split("\0")
19+
result = issues.first.strip
20+
2021
json = JSON.parse(result)
2122

2223
expect(json["type"]).to eq("issue")
@@ -27,13 +28,13 @@
2728
"path" => "foo.js",
2829
"lines" => { "begin" => 1, "end" => 1 },
2930
})
30-
expect(json["remediation_points"]).to eq(297000)
31+
expect(json["remediation_points"]).to eq(33_000)
3132
expect(json["other_locations"]).to eq([
3233
{"path" => "foo.js", "lines" => { "begin" => 2, "end" => 2} },
3334
{"path" => "foo.js", "lines" => { "begin" => 3, "end" => 3} }
3435
])
35-
expect(json["content"]["body"]).to match /This issue has a mass of `99`/
36-
expect(json["fingerprint"]).to eq("55ae5d0990647ef496e9e0d315f9727d")
36+
expect(json["content"]["body"]).to match /This issue has a mass of `11`/
37+
expect(json["fingerprint"]).to eq("c4d29200c20d02297c6f550ad2c87c15")
3738
end
3839

3940
it "prints an issue for similar code" do
@@ -43,7 +44,9 @@
4344
console.log("helllllllllllllllllo JS!");
4445
EOJS
4546

46-
result = run_engine(engine_conf).strip
47+
issues = run_engine(engine_conf).strip.split("\0")
48+
result = issues.first.strip
49+
4750
json = JSON.parse(result)
4851

4952
expect(json["type"]).to eq("issue")
@@ -54,13 +57,13 @@
5457
"path" => "foo.js",
5558
"lines" => { "begin" => 1, "end" => 1 },
5659
})
57-
expect(json["remediation_points"]).to eq(99000)
60+
expect(json["remediation_points"]).to eq(33_000)
5861
expect(json["other_locations"]).to eq([
5962
{"path" => "foo.js", "lines" => { "begin" => 2, "end" => 2} },
6063
{"path" => "foo.js", "lines" => { "begin" => 3, "end" => 3} }
6164
])
62-
expect(json["content"]["body"]).to match /This issue has a mass of `33`/
63-
expect(json["fingerprint"]).to eq("55ae5d0990647ef496e9e0d315f9727d")
65+
expect(json["content"]["body"]).to match /This issue has a mass of `11`/
66+
expect(json["fingerprint"]).to eq("d9dab8e4607e2a74da3b9eefb49eacec")
6467
end
6568

6669
it "skips unparsable files" do
@@ -91,8 +94,8 @@
9194
<a className='button button-primary full' href='#' onClick={this.onSubmit.bind(this)}>Login</a>
9295
EOJSX
9396

94-
result = run_engine(engine_conf).strip
95-
issues = result.split("\0")
97+
issues = run_engine(engine_conf).strip.split("\0")
98+
9699
expect(issues.length).to eq 1
97100
end
98101

0 commit comments

Comments
 (0)