Skip to content

Commit 076bd80

Browse files
committed
gopls/internal/filewatcher: retry directory reading upon failure
Previously, file watcher used filepath.WalkDir to walk the tree but filepath.WalkDir only read a given dir once. If an error happened, the filepath.WalkDir will give up on that dir without any retry. Instead, file watcher will retry read dir if the error returned is considered as IsEphemeralError. Otherwise, call error handler with the returned error and move on to the next entry in the tree. Fix golang/go#74820 Change-Id: If13f8eeaaad8e3123083c3bf0eee3bb688e80faa Reviewed-on: https://go-review.googlesource.com/c/tools/+/719320 Reviewed-by: Robert Findley <rfindley@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
1 parent 605803f commit 076bd80

File tree

1 file changed

+117
-22
lines changed

1 file changed

+117
-22
lines changed

gopls/internal/filewatcher/filewatcher.go

Lines changed: 117 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616

1717
"github.com/fsnotify/fsnotify"
1818
"golang.org/x/tools/gopls/internal/protocol"
19+
"golang.org/x/tools/internal/robustio"
1920
)
2021

2122
// ErrClosed is used when trying to operate on a closed Watcher.
@@ -186,42 +187,34 @@ func (w *Watcher) process(errHandler func(error)) {
186187
if isDir {
187188
switch e.Type {
188189
case protocol.Created:
189-
// Walks the entire directory tree, synthesizes create events for its contents,
190-
// and establishes watches for subdirectories. This recursive, pre-order
191-
// traversal using filepath.WalkDir guarantees a logical event sequence:
192-
// parent directory creation events always precede those of their children.
190+
// Walks the entire directory tree, synthesizes create
191+
// events for its contents, and establishes watches for
192+
// subdirectories. This recursive, pre-order traversal
193+
// guarantees a logical event sequence: parent directory
194+
// creation events always precede those of their children.
193195
//
194-
// For example, consider a creation event for directory a, and suppose
195-
// a has contents [a/b, a/b/c, a/c, a/c/d]. The effective events will be:
196+
// For example, consider a creation event for directory
197+
// a, and suppose a has contents [a/b, a/b/c, a/c, a/c/d].
198+
// The effective events will be:
196199
//
197200
// CREATE a
198201
// CREATE a/b
199202
// CREATE a/b/c
200203
// CREATE a/c
201204
// CREATE a/c/d
202-
filepath.WalkDir(event.Name, func(path string, d fs.DirEntry, err error) error {
203-
if d.IsDir() && skipDir(d.Name()) {
204-
return filepath.SkipDir
205-
}
206-
if !d.IsDir() && skipFile(d.Name()) {
207-
return nil
208-
}
209-
210-
if path != event.Name { // avoid duplicate create event for root
205+
w.walkDirWithRetry(event.Name, errHandler, func(path string, isDir bool) error {
206+
if path != event.Name {
211207
synthesized = append(synthesized, protocol.FileEvent{
212208
URI: protocol.URIFromPath(path),
213209
Type: protocol.Created,
214210
})
215211
}
216212

217-
if d.IsDir() {
218-
if err := w.watchDir(path); err != nil {
219-
errHandler(err)
220-
return filepath.SkipDir
221-
}
213+
if isDir {
214+
return w.watchDir(path)
215+
} else {
216+
return nil
222217
}
223-
224-
return nil
225218
})
226219

227220
case protocol.Deleted:
@@ -442,3 +435,105 @@ func (w *Watcher) Close() error {
442435

443436
return err
444437
}
438+
439+
// walkDir calls fn against current path and recursively descends path for each
440+
// file or directory of our interest.
441+
func (w *Watcher) walkDir(path string, isDir bool, errHandler func(error), fn func(path string, isDir bool) error) {
442+
if err := fn(path, isDir); err != nil {
443+
errHandler(err)
444+
return
445+
}
446+
447+
if !isDir {
448+
return
449+
}
450+
451+
entries, err := tryFSOperation(w.stop, func() ([]fs.DirEntry, error) {
452+
// ReadDir may fail due because other processes may be actively
453+
// modifying the watched dir see golang/go#74820.
454+
// TODO(hxjiang): consider adding robustio.ReadDir.
455+
return os.ReadDir(path)
456+
})
457+
if err != nil {
458+
if err != ErrClosed {
459+
errHandler(err)
460+
}
461+
return
462+
}
463+
464+
for _, e := range entries {
465+
if e.IsDir() && skipDir(e.Name()) {
466+
continue
467+
}
468+
if !e.IsDir() && skipFile(e.Name()) {
469+
continue
470+
}
471+
472+
w.walkDir(filepath.Join(path, e.Name()), e.IsDir(), errHandler, fn)
473+
}
474+
}
475+
476+
// walkDirWithRetry walks the file tree rooted at root, calling fn for each
477+
// file or directory of our interest in the tree, including root.
478+
//
479+
// All errors that arise visiting directories or files will be reported to the
480+
// provided error handler function. If an error is encountered visiting a
481+
// directory, that entire subtree will be skipped.
482+
//
483+
// walkDirWithRetry does not follow symbolic links.
484+
//
485+
// It is used instead of [filepath.WalkDir] because it provides control over
486+
// retry behavior when reading a directory fails. If [os.ReadDir] fails with an
487+
// ephemeral error, it is retried multiple times with exponential backoff.
488+
//
489+
// TODO(hxjiang): call walkDirWithRetry in WalkDir.
490+
func (w *Watcher) walkDirWithRetry(root string, errHandler func(error), fn func(path string, isDir bool) error) {
491+
info, err := tryFSOperation(w.stop, func() (os.FileInfo, error) {
492+
return os.Lstat(root) // [os.Lstat] does not follow symlink.
493+
})
494+
if err != nil {
495+
if err != ErrClosed {
496+
errHandler(err)
497+
}
498+
return
499+
}
500+
501+
w.walkDir(root, info.IsDir(), errHandler, fn)
502+
}
503+
504+
// tryFSOperation executes a function `op` with retry logic, making it resilient
505+
// to transient errors. It attempts the operation up to 5 times with exponential
506+
// backoff. Retries occur only if the error is ephemeral.
507+
//
508+
// The operation can be interrupted by closing the `stop` channel, in which case
509+
// it returns [ErrClosed].
510+
func tryFSOperation[Result any](stop <-chan struct{}, op func() (Result, error)) (Result, error) {
511+
var (
512+
delay = 50 * time.Millisecond
513+
err error
514+
)
515+
516+
for i := range 5 {
517+
if i > 0 {
518+
select {
519+
case <-time.After(delay):
520+
delay *= 2
521+
case <-stop:
522+
var zero Result
523+
return zero, ErrClosed
524+
}
525+
}
526+
527+
var res Result
528+
res, err = op()
529+
530+
if robustio.IsEphemeralError(err) {
531+
continue
532+
} else {
533+
return res, err
534+
}
535+
}
536+
537+
var zero Result
538+
return zero, err // return last error encountered
539+
}

0 commit comments

Comments
 (0)