Skip to content

Commit 4297037

Browse files
authored
Merge pull request #85 from adamtornhill/84-support-custom-temporal-period
Support custom temporal period
2 parents c8ffe12 + 4829229 commit 4297037

File tree

6 files changed

+213
-71
lines changed

6 files changed

+213
-71
lines changed

project.clj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
[clj-time "0.9.0"]
1616
[org.clojure/math.numeric-tower "0.0.4"]
1717
[org.clojure/math.combinatorics "0.1.1"]
18+
[medley "1.4.0"]
1819
[semantic-csv "0.2.1-alpha1"]
1920
[instaparse "1.4.1"]]
2021
:main code-maat.cmd-line

src/code_maat/app/app.clj

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,10 @@
176176
the given temporal period. Allows the user to treat
177177
all commits during one day as a single, logical change set.
178178
NOTE: will probably not work with author's analyses!!!"
179-
[options commits]
180-
(if-let [time-period (:temporal-period options)]
181-
(time-grouper/run commits time-period)
179+
[{:keys [temporal-period] :as options}
180+
commits]
181+
(if temporal-period
182+
(time-grouper/by-time-period commits options)
182183
commits))
183184

184185
(defn- aggregate-authors-in-teams
Lines changed: 115 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
;;; Copyright (C) 2014 Adam Tornhill
22
;;;
33

4-
(ns code-maat.app.time-based-grouper)
4+
(ns code-maat.app.time-based-grouper
5+
(:require [clj-time.core :as t]
6+
[clj-time.format :as tf]
7+
[clj-time.periodic :as time-period]
8+
[clj-time.core :as tc]
9+
[medley.core :as m]))
510

611
;;; Sometimes we'd like to use a different temporal window than
712
;;; the commit. For example, when multiple teams are involved
@@ -11,30 +16,116 @@
1116
;;; To remove these biases we use this module to re-group all
1217
;;; changes according to a given time window before analysis.
1318
;;;
14-
;;; LIMITATION: At the moment we only support grouping commits that
15-
;;; occour within the same day. This is because I could implement
16-
;;; that aggregation easily. I plan to extend Code Maat with
17-
;;; support for arbitrary temporal windows.
18-
19-
(defn- date-as-commit-id
20-
[commit]
21-
(let [date (:date commit)]
22-
(update-in commit [:rev] (fn [_old] date))))
23-
24-
(defn- throw-on-invalid
25-
[time-period]
26-
(when (not (= "1" time-period)) ; Let's support more in the future...
27-
(throw
28-
(IllegalArgumentException.
29-
(str "Invalid time-period: the current version only supports one (1) day")))))
30-
31-
(defn run
19+
;;; Grouping commits by time involves a sliding window over the
20+
;;; original commits. This means that logically, the same physical commit
21+
;;; can be counted multiple times since it overlaps with several slides
22+
;;; of the window. This works well for change coupling but not hotspots.
23+
;;; Hence, the validation ensures it's a supported analysis before
24+
;;; applying the filter.
25+
26+
(defn- string->date
27+
[s]
28+
(tf/parse (tf/formatters :year-month-day) s))
29+
30+
(defn date->string
31+
[d]
32+
(tf/unparse (tf/formatters :year-month-day) d))
33+
34+
(defn- date-of
35+
[cs]
36+
(some-> cs first :date string->date))
37+
38+
(defn- daily-dates-between
39+
"Create a range of DateTime objects where each date represens one day."
40+
[start end]
41+
(let [feeding-range (time-period/periodic-seq start (tc/days 1))
42+
end-condition-date (tc/plus end (tc/days 1))
43+
full-range? (fn [current-date] (t/before? current-date end-condition-date))]
44+
(take-while full-range? feeding-range)))
45+
46+
(defn- pad-commits-to-complete-time-series
47+
"There are probably many days which don't have any commits.
48+
This functions pads up those days with empty commit sets. That way, we can
49+
partition over the sequence and easily create the sliding window commit set."
50+
[commits]
51+
(let [commits-ascending (sort-by :date commits)
52+
first-commit-date (date-of commits-ascending)
53+
last-commit-date (date-of (reverse commits-ascending))
54+
commits-on-non-active-days []]
55+
(reduce (fn [acc date-in-range]
56+
(let [as-date (date->string date-in-range)
57+
commits-on-day (get acc as-date commits-on-non-active-days)]
58+
(assoc acc as-date commits-on-day)))
59+
(group-by :date commits)
60+
(daily-dates-between first-commit-date last-commit-date))))
61+
62+
(defn- drop-date-key
63+
"We used group-by to get commits by date. Now, drop the key so that
64+
only the commits remain."
65+
[grouped-commits]
66+
(map second grouped-commits))
67+
68+
(defn- remove-empty-windows
69+
"Not all dates have commit activity."
70+
[commits-within-sliding-windows]
71+
(remove (fn [cs]
72+
(every? empty? cs))
73+
commits-within-sliding-windows))
74+
75+
(defn- adjust-revision-to
76+
"The edge case is that the same file should only be included once, so
77+
let's filter out duplicates."
78+
[new-rev cs]
79+
(->> cs
80+
(map (fn [c]
81+
(assoc c :rev new-rev)))
82+
(m/distinct-by :entity)))
83+
84+
(defn- combine-commits-to-logical-changesets
85+
[commits-within-sliding-windows]
86+
(mapcat (fn [commits-in-window]
87+
(let [cs (reduce (partial into) commits-in-window)
88+
latest-day (->> cs (sort-by :date) reverse first :date)]
89+
(adjust-revision-to latest-day cs)))
90+
commits-within-sliding-windows))
91+
92+
(defn- combine-sliding-commits
93+
"After partitioning commits according to the sliding window, we
94+
need to deliver a flat sequence where each commit group in the window
95+
represents a logical commitset."
96+
[commits-within-sliding-windows]
97+
(->> commits-within-sliding-windows
98+
remove-empty-windows
99+
combine-commits-to-logical-changesets))
100+
101+
(defn- partition-commits-into-sliding-periods-of
102+
[time-period padded-cs]
103+
(->> padded-cs
104+
(sort-by first)
105+
drop-date-key
106+
(partition time-period 1)))
107+
108+
(defn- commits->sliding-window-seq
109+
[time-period cs]
110+
(->> cs
111+
pad-commits-to-complete-time-series
112+
(partition-commits-into-sliding-periods-of time-period)
113+
combine-sliding-commits))
114+
115+
(defn- validated-time-period-from
116+
[{:keys [temporal-period] :as _options}]
117+
(if (re-matches #"\d+" temporal-period)
118+
(int (Double/parseDouble temporal-period))
119+
(throw (IllegalArgumentException.
120+
(str "Invalid time-period: the given value '" temporal-period "' is not an integer.")))))
121+
122+
(defn by-time-period
32123
"Alright, this is a hack: we just set the commit ID to
33124
the current date. That makes the rest of the analyses treat
34125
our faked grouping as beloning to the same change set."
35-
([raw-data]
36-
(run raw-data "1"))
37-
([raw-data time-period]
38-
(throw-on-invalid time-period)
39-
(map date-as-commit-id raw-data)))
126+
[cs options]
127+
(let [time-period (validated-time-period-from options)]
128+
(if (seq cs)
129+
(commits->sliding-window-seq time-period cs)
130+
cs)))
40131

src/code_maat/cmd_line.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
:default 30 :parse-fn #(Integer/parseInt %)]
3434
["-e" "--expression-to-match MATCH-EXPRESSION" "A regex to match against commit messages. Used with -messages analyses"]
3535
["-t" "--temporal-period TEMPORAL-PERIOD"
36-
"Instructs Code Maat to consider all commits during the same day as a single, logical commit"]
36+
"Used for coupling analyses. Instructs Code Maat to consider all commits during the rolling temporal period as a single, logical commit set"]
3737
["-d" "--age-time-now AGE-TIME_NOW" "Specify a date as YYYY-MM-dd that counts as time zero when doing a code age analysis"]
3838
["-h" "--help"]])
3939

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
;;; Copyright (C) 2014 Adam Tornhill
2+
;;;
3+
4+
(ns code-maat.app.time-based-end-to-end-test
5+
(:require [code-maat.app.app :as app])
6+
(:use [clojure.test]
7+
[code-maat.tools.test-tools]))
8+
9+
;;; End-to-end tests to simulate a time-based analysis.
10+
;;;
11+
;;; The test data contains two commits done the same day.
12+
;;; With the default options we'll treat them as separate.
13+
;;; In a time-base analysis we consider them as a logical
14+
;;; part of the same work.
15+
16+
(def ^:const log-file "./test/code_maat/app/day_coupled_entities_git.txt")
17+
18+
(def ^:const csv-options
19+
{:version-control "git"
20+
:analysis "coupling"
21+
:min-revs 1
22+
:min-shared-revs 1
23+
:min-coupling 10
24+
:max-coupling 100
25+
:max-changeset-size 10})
26+
27+
(def ^:const csv-options-for-time-based
28+
(merge csv-options {:temporal-period "1"}))
29+
30+
(deftest only-calculates-coupling-within-same-commit-by-default
31+
(is (= (run-with-str-output log-file csv-options)
32+
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))
33+
34+
(deftest calculates-coupling-within-same-day
35+
(is (= (run-with-str-output log-file csv-options-for-time-based)
36+
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n/Infrastrucure/Network/Connection.cs,/Infrastrucure/Network/TcpConnection.cs,100,1\n/Infrastrucure/Network/TcpConnection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))
37+
38+
(def ^:const options-with-invalid-time-period
39+
(merge csv-options {:temporal-period "not a number"}))
40+
41+
(deftest throws-on-unsupported-time-periods
42+
"We hope to support more options in the future."
43+
(is (thrown? IllegalArgumentException
44+
(run-with-str-output log-file options-with-invalid-time-period))))
Lines changed: 48 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,49 @@
1-
;;; Copyright (C) 2014 Adam Tornhill
2-
;;;
3-
41
(ns code-maat.app.time-based-grouper-test
5-
(:require [code-maat.app.app :as app])
6-
(:use [clojure.test]
7-
[code-maat.tools.test-tools]))
8-
9-
;;; End-to-end tests to simulate a time-based analysis.
10-
;;;
11-
;;; The test data contains two commits done the same day.
12-
;;; With the default options we'll treat them as separate.
13-
;;; In a time-base analysis we consider them as a logical
14-
;;; part of the same work.
15-
16-
(def ^:const log-file "./test/code_maat/app/day_coupled_entities_git.txt")
17-
18-
(def ^:const csv-options
19-
{:version-control "git"
20-
:analysis "coupling"
21-
:min-revs 1
22-
:min-shared-revs 1
23-
:min-coupling 10
24-
:max-coupling 100
25-
:max-changeset-size 10})
26-
27-
(def ^:const csv-options-for-time-based
28-
(merge csv-options {:temporal-period "1"}))
29-
30-
(deftest only-calculates-coupling-within-same-commit-by-default
31-
(is (= (run-with-str-output log-file csv-options)
32-
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))
33-
34-
(deftest calculates-coupling-within-same-day
35-
(is (= (run-with-str-output log-file csv-options-for-time-based)
36-
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n/Infrastrucure/Network/Connection.cs,/Infrastrucure/Network/TcpConnection.cs,100,1\n/Infrastrucure/Network/TcpConnection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))
37-
38-
(def ^:const options-with-invalid-time-period
39-
(merge csv-options {:temporal-period "2"}))
40-
41-
(deftest throws-on-unsupported-time-periods
42-
"We hope to support more options in the future."
43-
(is (thrown? IllegalArgumentException
44-
(run-with-str-output log-file options-with-invalid-time-period))))
2+
(:require [code-maat.app.time-based-grouper :as grouper])
3+
(:use [clojure.test]))
4+
5+
(deftest commits-by-day
6+
(testing "Expect a non-modifying operation"
7+
(let [input-commits [{:entity "A" :rev 1 :date "2022-10-20"}
8+
{:entity "B" :rev 2 :date "2022-10-20"}]]
9+
(is (= [{:date "2022-10-20" :entity "A" :rev "2022-10-20"}
10+
{:date "2022-10-20" :entity "B" :rev "2022-10-20"}]
11+
(grouper/by-time-period input-commits {:temporal-period "1"}))))))
12+
13+
(deftest multiple-days-give-a-rolling-dataset
14+
(let [input-commits [{:entity "A" :rev 1 :date "2022-10-20"}
15+
{:entity "B" :rev 2 :date "2022-10-20"}
16+
17+
{:entity "B" :rev 3 :date "2022-10-19"} ; double entry, two B's when looking at last two days
18+
{:entity "D" :rev 3 :date "2022-10-19"}
19+
20+
{:entity "C" :rev 4 :date "2022-10-18"}
21+
{:entity "D" :rev 4 :date "2022-10-18"}
22+
23+
{:entity "D" :rev 5 :date "2022-10-15"}]] ; a gap in days between the commits
24+
(is (= [
25+
; Only commits on 2022-10-15, not on subsequent day:
26+
{:date "2022-10-15" :entity "D" :rev "2022-10-15"}
27+
28+
; 17-18th
29+
{:date "2022-10-18" :entity "C" :rev "2022-10-18"}
30+
{:date "2022-10-18" :entity "D" :rev "2022-10-18"}
31+
32+
; 18-19th
33+
{:date "2022-10-18" :entity "C" :rev "2022-10-19"}
34+
{:date "2022-10-18" :entity "D" :rev "2022-10-19"}
35+
{:date "2022-10-19" :entity "B" :rev "2022-10-19"}
36+
37+
; 19-20th
38+
{:date "2022-10-19" :entity "B" :rev "2022-10-20"}
39+
{:date "2022-10-19" :entity "D" :rev "2022-10-20"}
40+
{:date "2022-10-20" :entity "A" :rev "2022-10-20"}]
41+
(grouper/by-time-period input-commits {:temporal-period "2"})))))
42+
43+
(deftest edge-cases
44+
(testing "Works on an empty input sequence, ie. no commits"
45+
(is (= []
46+
(grouper/by-time-period [] {:temporal-period "2"}))))
47+
(testing "Works on a single commit"
48+
(is (= [{:date "2022-10-19" :entity "B" :rev "2022-10-19"}]
49+
(grouper/by-time-period [{:entity "B" :rev 3 :date "2022-10-19"}] {:temporal-period "1"})))))

0 commit comments

Comments
 (0)