forked from dlclark/regexp2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Speed up matches in the presence of timeouts.
Currently regexp2 can be quite slow when a MatchTimeout is supplied (a micro-benchmark shows 3000ns compared to 45ns when no timeout is supplied). This slowdown is caused by repeated timeout checks which call time.Now(). The new approach introduces a fast but approximate clock that is just an atomic variable updated by a goroutine once very 100ms. The new timeout check just compares this variable to the precomputed deadline. Removed "timeout check skip" mechanism since a timeout check is now very cheap. Added a simple micro-benchmark that compares the speed of searching 100 byte text with and without a timeout. Performance impact: 1. A micro-benchmark that looks for an "easy" regexp in a 100 byte string goes from ~3000ns to ~45ns. 2. Chroma (syntax highlighter) speeds up from ~500ms to ~50ms on a 24KB source file. 3. A background CPU load of ~0.15% is present until the end of of all match deadlines (even for matches that have finished).
- Loading branch information
Showing
7 changed files
with
273 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
package regexp2 | ||
|
||
import ( | ||
"sync" | ||
"sync/atomic" | ||
"time" | ||
) | ||
|
||
// fasttime holds a time value (ticks since clock initialization) | ||
type fasttime int64 | ||
|
||
// fastclock provides a fast clock implementation. | ||
// | ||
// A background goroutine periodically stores the current time | ||
// into an atomic variable. | ||
// | ||
// A deadline can be quickly checked for expiration by comparing | ||
// its value to the clock stored in the atomic variable. | ||
// | ||
// The goroutine automatically stops once clockEnd is reached. | ||
// (clockEnd covers the largest deadline seen so far + some | ||
// extra time). This ensures that if regexp2 with timeouts | ||
// stops being used we will stop background work. | ||
type fastclock struct { | ||
// current and clockEnd can be read via atomic loads. | ||
// Reads and writes of other fields require mu to be held. | ||
mu sync.Mutex | ||
|
||
start time.Time // Time corresponding to fasttime(0) | ||
current atomicTime // Current time (approximate) | ||
clockEnd atomicTime // When clock updater is supposed to stop (>= any existing deadline) | ||
running bool // Is a clock updater running? | ||
} | ||
|
||
var fast fastclock | ||
|
||
// reached returns true if current time is at or past t. | ||
func (t fasttime) reached() bool { | ||
return fast.current.read() >= t | ||
} | ||
|
||
// makeDeadline returns a time that is approximately time.Now().Add(d) | ||
func makeDeadline(d time.Duration) fasttime { | ||
// Increase the deadline since the clock we are reading may be | ||
// just about to tick forwards. | ||
end := fast.current.read() + durationToTicks(d+clockPeriod) | ||
|
||
// Start or extend clock if necessary. | ||
if end > fast.clockEnd.read() { | ||
extendClock(end) | ||
} | ||
return end | ||
} | ||
|
||
// extendClock ensures that clock is live and will run until at least end. | ||
func extendClock(end fasttime) { | ||
fast.mu.Lock() | ||
defer fast.mu.Unlock() | ||
|
||
if fast.start.IsZero() { | ||
fast.start = time.Now() | ||
} | ||
|
||
// Extend the running time to cover end as well as a bit of slop. | ||
if shutdown := end + durationToTicks(time.Second); shutdown > fast.clockEnd.read() { | ||
fast.clockEnd.write(shutdown) | ||
} | ||
|
||
// Start clock if necessary | ||
if !fast.running { | ||
fast.running = true | ||
go runClock() | ||
} | ||
} | ||
|
||
func durationToTicks(d time.Duration) fasttime { | ||
// Downscale nanoseconds to approximately a millisecond so that we can avoid | ||
// overflow even if the caller passes in math.MaxInt64. | ||
return fasttime(d) >> 20 | ||
} | ||
|
||
// clockPeriod is the approximate interval between updates of approximateClock. | ||
const clockPeriod = 100 * time.Millisecond | ||
|
||
func runClock() { | ||
fast.mu.Lock() | ||
defer fast.mu.Unlock() | ||
|
||
for fast.current.read() <= fast.clockEnd.read() { | ||
// Unlock while sleeping. | ||
fast.mu.Unlock() | ||
time.Sleep(clockPeriod) | ||
fast.mu.Lock() | ||
|
||
newTime := durationToTicks(time.Since(fast.start)) | ||
fast.current.write(newTime) | ||
} | ||
fast.running = false | ||
} | ||
|
||
type atomicTime struct{ v int64 } // Should change to atomic.Int64 when we can use go 1.19 | ||
|
||
func (t *atomicTime) read() fasttime { return fasttime(atomic.LoadInt64(&t.v)) } | ||
func (t *atomicTime) write(v fasttime) { atomic.StoreInt64(&t.v, int64(v)) } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package regexp2 | ||
|
||
import ( | ||
"fmt" | ||
"testing" | ||
"time" | ||
) | ||
|
||
func TestDeadline(t *testing.T) { | ||
for _, delay := range []time.Duration{ | ||
clockPeriod / 10, | ||
clockPeriod, | ||
clockPeriod * 5, | ||
clockPeriod * 10, | ||
} { | ||
delay := delay // Make copy for parallel sub-test. | ||
t.Run(fmt.Sprint(delay), func(t *testing.T) { | ||
t.Parallel() | ||
start := time.Now() | ||
d := makeDeadline(delay) | ||
if d.reached() { | ||
t.Fatalf("deadline (%v) unexpectedly expired immediately", delay) | ||
} | ||
time.Sleep(delay / 2) | ||
if d.reached() { | ||
t.Fatalf("deadline (%v) expired too soon (after %v)", delay, time.Since(start)) | ||
} | ||
time.Sleep(delay/2 + 2*clockPeriod) // Give clock time to tick | ||
if !d.reached() { | ||
t.Fatalf("deadline (%v) did not expire within %v", delay, time.Since(start)) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters