Skip to content

Commit

Permalink
gopls: fix windows file corruption
Browse files Browse the repository at this point in the history
Fix the bug that gopls finds the wrong content when formatting an open
URI whose spelling does not match the spelling on disk (i.e. because of
case insensitivity).

Remove the whole View.filesByBase mechanism: it is problematic as we
can't generally know whether or not we want to associate two different
spellings of the same file: for the purposes of finding packages we may
want to treat Foo.go as foo.go, but we don't want to treat a symlink of
foo.go in another directory the same.

Instead, use robustio.FileID to de-duplicate content in the cache, and
otherwise treat URIs as we receive them. This fixes the formatting
corruption, but means that we don't find packages for the corresponding
file (because go/packages.Load("file=foo.go") fails).  A failing test is
added for the latter bug.

Also: use a seenFiles map in the view to satisfy the concern of tracking
relevant files, with a TODO to delete this problematic map.

Along the way, refactor somewhat to separate and normalize the
implementations of source.FileSource.

For golang/go#57081

Change-Id: I02971a1702f057b644fa18a873790e8f0d98a323
Reviewed-on: https://go-review.googlesource.com/c/tools/+/462819
gopls-CI: kokoro <noreply+kokoro@google.com>
Run-TryBot: Robert Findley <rfindley@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
  • Loading branch information
findleyr committed Jan 30, 2023
1 parent 6f65213 commit ae242ec
Show file tree
Hide file tree
Showing 12 changed files with 429 additions and 325 deletions.
122 changes: 13 additions & 109 deletions gopls/internal/lsp/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,16 @@ import (
"go/token"
"go/types"
"html/template"
"os"
"reflect"
"sort"
"strconv"
"sync"
"sync/atomic"
"time"

"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/gopls/internal/span"
"golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/event/tag"
"golang.org/x/tools/internal/gocommand"
"golang.org/x/tools/internal/memoize"
"golang.org/x/tools/internal/robustio"
)

// New Creates a new cache for gopls operation results, using the given file
Expand All @@ -47,124 +43,32 @@ func New(fset *token.FileSet, store *memoize.Store) *Cache {
}

c := &Cache{
id: strconv.FormatInt(index, 10),
fset: fset,
store: store,
fileContent: map[span.URI]*DiskFile{},
id: strconv.FormatInt(index, 10),
fset: fset,
store: store,
memoizedFS: &memoizedFS{filesByID: map[robustio.FileID][]*DiskFile{}},
}
return c
}

// A Cache holds caching stores that are bundled together for consistency.
//
// TODO(rfindley): once fset and store need not be bundled together, the Cache
// type can be eliminated.
type Cache struct {
id string
fset *token.FileSet

store *memoize.Store

fileMu sync.Mutex
fileContent map[span.URI]*DiskFile
}

// A DiskFile is a file on the filesystem, or a failure to read one.
// It implements the source.FileHandle interface.
type DiskFile struct {
uri span.URI
modTime time.Time
content []byte
hash source.Hash
err error
}

func (h *DiskFile) URI() span.URI { return h.uri }

func (h *DiskFile) FileIdentity() source.FileIdentity {
return source.FileIdentity{
URI: h.uri,
Hash: h.hash,
}
}

func (h *DiskFile) Saved() bool { return true }
func (h *DiskFile) Version() int32 { return 0 }

func (h *DiskFile) Read() ([]byte, error) {
return h.content, h.err
}

// GetFile stats and (maybe) reads the file, updates the cache, and returns it.
func (c *Cache) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) {
fi, statErr := os.Stat(uri.Filename())
if statErr != nil {
return &DiskFile{
err: statErr,
uri: uri,
}, nil
}

// We check if the file has changed by comparing modification times. Notably,
// this is an imperfect heuristic as various systems have low resolution
// mtimes (as much as 1s on WSL or s390x builders), so we only cache
// filehandles if mtime is old enough to be reliable, meaning that we don't
// expect a subsequent write to have the same mtime.
//
// The coarsest mtime precision we've seen in practice is 1s, so consider
// mtime to be unreliable if it is less than 2s old. Capture this before
// doing anything else.
recentlyModified := time.Since(fi.ModTime()) < 2*time.Second

c.fileMu.Lock()
fh, ok := c.fileContent[uri]
c.fileMu.Unlock()

if ok && fh.modTime.Equal(fi.ModTime()) {
return fh, nil
}

fh, err := readFile(ctx, uri, fi) // ~25us
if err != nil {
return nil, err
}
c.fileMu.Lock()
if !recentlyModified {
c.fileContent[uri] = fh
} else {
delete(c.fileContent, uri)
}
c.fileMu.Unlock()
return fh, nil
}

// ioLimit limits the number of parallel file reads per process.
var ioLimit = make(chan struct{}, 128)

func readFile(ctx context.Context, uri span.URI, fi os.FileInfo) (*DiskFile, error) {
select {
case ioLimit <- struct{}{}:
case <-ctx.Done():
return nil, ctx.Err()
}
defer func() { <-ioLimit }()

ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Filename()))
_ = ctx
defer done()

content, err := os.ReadFile(uri.Filename()) // ~20us
if err != nil {
content = nil // just in case
}
return &DiskFile{
modTime: fi.ModTime(),
uri: uri,
content: content,
hash: source.HashOf(content),
err: err,
}, nil
*memoizedFS // implements source.FileSource
}

// NewSession creates a new gopls session with the given cache and options overrides.
//
// The provided optionsOverrides may be nil.
//
// TODO(rfindley): move this to session.go.
func NewSession(ctx context.Context, c *Cache, optionsOverrides func(*source.Options)) *Session {
index := atomic.AddInt64(&sessionIndex, 1)
options := source.DefaultOptions().Clone()
Expand All @@ -176,7 +80,7 @@ func NewSession(ctx context.Context, c *Cache, optionsOverrides func(*source.Opt
cache: c,
gocmdRunner: &gocommand.Runner{},
options: options,
overlays: make(map[span.URI]*Overlay),
overlayFS: newOverlayFS(c),
}
event.Log(ctx, "New session", KeyCreateSession.Of(s))
return s
Expand Down
149 changes: 149 additions & 0 deletions gopls/internal/lsp/cache/fs_memoized.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cache

import (
"context"
"os"
"sync"
"time"

"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/gopls/internal/span"
"golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/event/tag"
"golang.org/x/tools/internal/robustio"
)

// A memoizedFS is a file source that memoizes reads, to reduce IO.
type memoizedFS struct {
mu sync.Mutex

// filesByID maps existing file inodes to the result of a read.
// (The read may have failed, e.g. due to EACCES or a delete between stat+read.)
// Each slice is a non-empty list of aliases: different URIs.
filesByID map[robustio.FileID][]*DiskFile
}

func newMemoizedFS() *memoizedFS {
return &memoizedFS{filesByID: make(map[robustio.FileID][]*DiskFile)}
}

// A DiskFile is a file on the filesystem, or a failure to read one.
// It implements the source.FileHandle interface.
type DiskFile struct {
uri span.URI
modTime time.Time
content []byte
hash source.Hash
err error
}

func (h *DiskFile) URI() span.URI { return h.uri }

func (h *DiskFile) FileIdentity() source.FileIdentity {
return source.FileIdentity{
URI: h.uri,
Hash: h.hash,
}
}

func (h *DiskFile) Saved() bool { return true }
func (h *DiskFile) Version() int32 { return 0 }
func (h *DiskFile) Read() ([]byte, error) { return h.content, h.err }

// GetFile stats and (maybe) reads the file, updates the cache, and returns it.
func (fs *memoizedFS) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) {
id, mtime, err := robustio.GetFileID(uri.Filename())
if err != nil {
// file does not exist
return &DiskFile{
err: err,
uri: uri,
}, nil
}

// We check if the file has changed by comparing modification times. Notably,
// this is an imperfect heuristic as various systems have low resolution
// mtimes (as much as 1s on WSL or s390x builders), so we only cache
// filehandles if mtime is old enough to be reliable, meaning that we don't
// expect a subsequent write to have the same mtime.
//
// The coarsest mtime precision we've seen in practice is 1s, so consider
// mtime to be unreliable if it is less than 2s old. Capture this before
// doing anything else.
recentlyModified := time.Since(mtime) < 2*time.Second

fs.mu.Lock()
fhs, ok := fs.filesByID[id]
if ok && fhs[0].modTime.Equal(mtime) {
var fh *DiskFile
// We have already seen this file and it has not changed.
for _, h := range fhs {
if h.uri == uri {
fh = h
break
}
}
// No file handle for this exact URI. Create an alias, but share content.
if fh == nil {
newFH := *fhs[0]
newFH.uri = uri
fh = &newFH
fhs = append(fhs, fh)
fs.filesByID[id] = fhs
}
fs.mu.Unlock()
return fh, nil
}
fs.mu.Unlock()

// Unknown file, or file has changed. Read (or re-read) it.
fh, err := readFile(ctx, uri, mtime) // ~25us
if err != nil {
return nil, err // e.g. cancelled (not: read failed)
}

fs.mu.Lock()
if !recentlyModified {
fs.filesByID[id] = []*DiskFile{fh}
} else {
delete(fs.filesByID, id)
}
fs.mu.Unlock()
return fh, nil
}

// ioLimit limits the number of parallel file reads per process.
var ioLimit = make(chan struct{}, 128)

func readFile(ctx context.Context, uri span.URI, mtime time.Time) (*DiskFile, error) {
select {
case ioLimit <- struct{}{}:
case <-ctx.Done():
return nil, ctx.Err()
}
defer func() { <-ioLimit }()

ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Filename()))
_ = ctx
defer done()

// It is possible that a race causes us to read a file with different file
// ID, or whose mtime differs from the given mtime. However, in these cases
// we expect the client to notify of a subsequent file change, and the file
// content should be eventually consistent.
content, err := os.ReadFile(uri.Filename()) // ~20us
if err != nil {
content = nil // just in case
}
return &DiskFile{
modTime: mtime,
uri: uri,
content: content,
hash: source.HashOf(content),
err: err,
}, nil
}
76 changes: 76 additions & 0 deletions gopls/internal/lsp/cache/fs_overlay.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cache

import (
"context"
"sync"

"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/gopls/internal/span"
)

// An overlayFS is a source.FileSource that keeps track of overlays on top of a
// delegate FileSource.
type overlayFS struct {
delegate source.FileSource

mu sync.Mutex
overlays map[span.URI]*Overlay
}

func newOverlayFS(delegate source.FileSource) *overlayFS {
return &overlayFS{
delegate: delegate,
overlays: make(map[span.URI]*Overlay),
}
}

// Overlays returns a new unordered array of overlays.
func (fs *overlayFS) Overlays() []*Overlay {
overlays := make([]*Overlay, 0, len(fs.overlays))
for _, overlay := range fs.overlays {
overlays = append(overlays, overlay)
}
return overlays
}

func (fs *overlayFS) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) {
fs.mu.Lock()
overlay, ok := fs.overlays[uri]
fs.mu.Unlock()
if ok {
return overlay, nil
}
return fs.delegate.GetFile(ctx, uri)
}

// An Overlay is a file open in the editor. It may have unsaved edits.
// It implements the source.FileHandle interface.
type Overlay struct {
uri span.URI
content []byte
hash source.Hash
version int32
kind source.FileKind

// saved is true if a file matches the state on disk,
// and therefore does not need to be part of the overlay sent to go/packages.
saved bool
}

func (o *Overlay) URI() span.URI { return o.uri }

func (o *Overlay) FileIdentity() source.FileIdentity {
return source.FileIdentity{
URI: o.uri,
Hash: o.hash,
}
}

func (o *Overlay) Read() ([]byte, error) { return o.content, nil }
func (o *Overlay) Version() int32 { return o.version }
func (o *Overlay) Saved() bool { return o.saved }
func (o *Overlay) Kind() source.FileKind { return o.kind }
Loading

0 comments on commit ae242ec

Please sign in to comment.