|
1 | 1 | ;;; Copyright (C) 2014 Adam Tornhill
|
2 | 2 | ;;;
|
3 | 3 |
|
4 |
| -(ns code-maat.app.time-based-grouper) |
| 4 | +(ns code-maat.app.time-based-grouper |
| 5 | + (:require [clj-time.core :as t] |
| 6 | + [clj-time.format :as tf] |
| 7 | + [clj-time.periodic :as time-period] |
| 8 | + [clj-time.core :as tc] |
| 9 | + [medley.core :as m])) |
5 | 10 |
|
6 | 11 | ;;; Sometimes we'd like to use a different temporal window than
|
7 | 12 | ;;; the commit. For example, when multiple teams are involved
|
|
11 | 16 | ;;; To remove these biases we use this module to re-group all
|
12 | 17 | ;;; changes according to a given time window before analysis.
|
13 | 18 | ;;;
|
14 |
| -;;; LIMITATION: At the moment we only support grouping commits that |
15 |
| -;;; occour within the same day. This is because I could implement |
16 |
| -;;; that aggregation easily. I plan to extend Code Maat with |
17 |
| -;;; support for arbitrary temporal windows. |
18 |
| - |
19 |
| -(defn- date-as-commit-id |
20 |
| - [commit] |
21 |
| - (let [date (:date commit)] |
22 |
| - (update-in commit [:rev] (fn [_old] date)))) |
23 |
| - |
24 |
| -(defn- throw-on-invalid |
25 |
| - [time-period] |
26 |
| - (when (not (= "1" time-period)) ; Let's support more in the future... |
27 |
| - (throw |
28 |
| - (IllegalArgumentException. |
29 |
| - (str "Invalid time-period: the current version only supports one (1) day"))))) |
30 |
| - |
31 |
| -(defn run |
| 19 | +;;; Grouping commits by time involves a sliding window over the |
| 20 | +;;; original commits. This means that logically, the same physical commit |
| 21 | +;;; can be counted multiple times since it overlaps with several slides |
| 22 | +;;; of the window. This works well for change coupling but not hotspots. |
| 23 | +;;; Hence, the validation ensures it's a supported analysis before |
| 24 | +;;; applying the filter. |
| 25 | + |
| 26 | +(defn- string->date |
| 27 | + [s] |
| 28 | + (tf/parse (tf/formatters :year-month-day) s)) |
| 29 | + |
| 30 | +(defn date->string |
| 31 | + [d] |
| 32 | + (tf/unparse (tf/formatters :year-month-day) d)) |
| 33 | + |
| 34 | +(defn- date-of |
| 35 | + [cs] |
| 36 | + (some-> cs first :date string->date)) |
| 37 | + |
| 38 | +(defn- daily-dates-between |
| 39 | + "Create a range of DateTime objects where each date represens one day." |
| 40 | + [start end] |
| 41 | + (let [feeding-range (time-period/periodic-seq start (tc/days 1)) |
| 42 | + end-condition-date (tc/plus end (tc/days 1)) |
| 43 | + full-range? (fn [current-date] (t/before? current-date end-condition-date))] |
| 44 | + (take-while full-range? feeding-range))) |
| 45 | + |
| 46 | +(defn- pad-commits-to-complete-time-series |
| 47 | + "There are probably many days which don't have any commits. |
| 48 | + This functions pads up those days with empty commit sets. That way, we can |
| 49 | + partition over the sequence and easily create the sliding window commit set." |
| 50 | + [commits] |
| 51 | + (let [commits-ascending (sort-by :date commits) |
| 52 | + first-commit-date (date-of commits-ascending) |
| 53 | + last-commit-date (date-of (reverse commits-ascending)) |
| 54 | + commits-on-non-active-days []] |
| 55 | + (reduce (fn [acc date-in-range] |
| 56 | + (let [as-date (date->string date-in-range) |
| 57 | + commits-on-day (get acc as-date commits-on-non-active-days)] |
| 58 | + (assoc acc as-date commits-on-day))) |
| 59 | + (group-by :date commits) |
| 60 | + (daily-dates-between first-commit-date last-commit-date)))) |
| 61 | + |
| 62 | +(defn- drop-date-key |
| 63 | + "We used group-by to get commits by date. Now, drop the key so that |
| 64 | + only the commits remain." |
| 65 | + [grouped-commits] |
| 66 | + (map second grouped-commits)) |
| 67 | + |
| 68 | +(defn- remove-empty-windows |
| 69 | + "Not all dates have commit activity." |
| 70 | + [commits-within-sliding-windows] |
| 71 | + (remove (fn [cs] |
| 72 | + (every? empty? cs)) |
| 73 | + commits-within-sliding-windows)) |
| 74 | + |
| 75 | +(defn- adjust-revision-to |
| 76 | + "The edge case is that the same file should only be included once, so |
| 77 | + let's filter out duplicates." |
| 78 | + [new-rev cs] |
| 79 | + (->> cs |
| 80 | + (map (fn [c] |
| 81 | + (assoc c :rev new-rev))) |
| 82 | + (m/distinct-by :entity))) |
| 83 | + |
| 84 | +(defn- combine-commits-to-logical-changesets |
| 85 | + [commits-within-sliding-windows] |
| 86 | + (mapcat (fn [commits-in-window] |
| 87 | + (let [cs (reduce (partial into) commits-in-window) |
| 88 | + latest-day (->> cs (sort-by :date) reverse first :date)] |
| 89 | + (adjust-revision-to latest-day cs))) |
| 90 | + commits-within-sliding-windows)) |
| 91 | + |
| 92 | +(defn- combine-sliding-commits |
| 93 | + "After partitioning commits according to the sliding window, we |
| 94 | + need to deliver a flat sequence where each commit group in the window |
| 95 | + represents a logical commitset." |
| 96 | + [commits-within-sliding-windows] |
| 97 | + (->> commits-within-sliding-windows |
| 98 | + remove-empty-windows |
| 99 | + combine-commits-to-logical-changesets)) |
| 100 | + |
| 101 | +(defn- partition-commits-into-sliding-periods-of |
| 102 | + [time-period padded-cs] |
| 103 | + (->> padded-cs |
| 104 | + (sort-by first) |
| 105 | + drop-date-key |
| 106 | + (partition time-period 1))) |
| 107 | + |
| 108 | +(defn- commits->sliding-window-seq |
| 109 | + [time-period cs] |
| 110 | + (->> cs |
| 111 | + pad-commits-to-complete-time-series |
| 112 | + (partition-commits-into-sliding-periods-of time-period) |
| 113 | + combine-sliding-commits)) |
| 114 | + |
| 115 | +(defn- validated-time-period-from |
| 116 | + [{:keys [temporal-period] :as _options}] |
| 117 | + (if (re-matches #"\d+" temporal-period) |
| 118 | + (int (Double/parseDouble temporal-period)) |
| 119 | + (throw (IllegalArgumentException. |
| 120 | + (str "Invalid time-period: the given value '" temporal-period "' is not an integer."))))) |
| 121 | + |
| 122 | +(defn by-time-period |
32 | 123 | "Alright, this is a hack: we just set the commit ID to
|
33 | 124 | the current date. That makes the rest of the analyses treat
|
34 | 125 | our faked grouping as beloning to the same change set."
|
35 |
| - ([raw-data] |
36 |
| - (run raw-data "1")) |
37 |
| - ([raw-data time-period] |
38 |
| - (throw-on-invalid time-period) |
39 |
| - (map date-as-commit-id raw-data))) |
| 126 | + [cs options] |
| 127 | + (let [time-period (validated-time-period-from options)] |
| 128 | + (if (seq cs) |
| 129 | + (commits->sliding-window-seq time-period cs) |
| 130 | + cs))) |
40 | 131 |
|
0 commit comments