Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NO-ISSUE: Ensure that hard links are accounted for when measuring disk usage #7217

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
NO-ISSUE: Ensure that hard links are accounted for when measuring dis…
…k usage.

When we measure the disk usage for metrics, we need to ensure that hard links are accounted for as these may lead to an inaccurate count.
This PR modifies the gathering of filesizes slightly to perform a stat on each file and take into account the unique inode number.

Hard links and actual files share the same inode number as this is a reference to the underlying data.
  • Loading branch information
paul-maidment committed Jan 22, 2025
commit 6ac1134866ae16825f542692bcde80f8a8377792
14 changes: 13 additions & 1 deletion internal/metrics/disk_stats_helper.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package metrics

import (
"fmt"
"os"
"path/filepath"
"syscall"

"github.com/pkg/errors"
"golang.org/x/sys/unix"
Expand All @@ -22,12 +24,22 @@ func NewOSDiskStatsHelper() *OSDiskStatsHelper {

func (c *OSDiskStatsHelper) getUsedBytesInDirectory(directory string) (uint64, error) {
var totalBytes uint64
// Maintain a map of inodes we have seen so that we don't double count storage
seenInodes := make(map[uint64]bool)
err := filepath.Walk(directory, func(path string, fileInfo os.FileInfo, err error) error {
if err != nil {
return err
}
if !fileInfo.IsDir() {
// We need to ensure that the size check is based on inodes and not just the sizes gleaned from files.
// we should ensure that we have not seen a particular inode for a given file before.
// this is because there are hard links in use and we need to account for this.
stat, ok := fileInfo.Sys().(*syscall.Stat_t)
paul-maidment marked this conversation as resolved.
Show resolved Hide resolved
if !ok {
return fmt.Errorf("unable to determine stat information for path %s", path)
}
if !fileInfo.IsDir() && !seenInodes[stat.Ino] {
totalBytes += uint64(fileInfo.Size())
seenInodes[stat.Ino] = true
}
return nil
})
Expand Down
44 changes: 43 additions & 1 deletion internal/metrics/os_disk_stats_helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package metrics

import (
"os"
"path/filepath"

"github.com/google/uuid"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
Expand All @@ -24,13 +26,14 @@ var _ = Describe("OS disk stats helper", func() {
os.RemoveAll(tempDir)
})

var writeDummyFile = func(sizeInBytes int64) {
var writeDummyFile = func(sizeInBytes int64) string {
file, err := os.CreateTemp(tempDir, "dummy-file")
Expect(err).ToNot(HaveOccurred())
defer file.Close()

err = file.Truncate(sizeInBytes)
Expect(err).ToNot(HaveOccurred())
return file.Name()
}

It("should retrieve correct stats for an empty directory", func() {
Expand All @@ -49,4 +52,43 @@ var _ = Describe("OS disk stats helper", func() {
Expect(freeBytes).To(BeNumerically(">", 0))
})

It("hardlinks should be correctly handled", func() {
writeDummyFile(16384)

// Create a hardlink to a second file
path := writeDummyFile(16384)
link := filepath.Join(tempDir, "ln_"+uuid.NewString())
err := os.Link(path, link)
Expect(err).ToNot((HaveOccurred()))

usageBytes, _, err := diskStatsHelper.GetDiskUsage(tempDir)
Expect(err).ToNot(HaveOccurred())

// We should only be counting the size of the two actual files.
Expect(usageBytes).To(BeEquivalentTo(32768))

// Delete the main file link
err = os.Remove(path)
Expect(err).ToNot(HaveOccurred())

// Grab usage again
usageBytes, _, err = diskStatsHelper.GetDiskUsage(tempDir)
Expect(err).ToNot(HaveOccurred())

// We expect the same result as the hardlink is still pointing to the old file
Expect(usageBytes).To(BeEquivalentTo(32768))

// Now get rid of the link
err = os.Remove(link)
Expect(err).ToNot(HaveOccurred())

// Grab usage again
usageBytes, _, err = diskStatsHelper.GetDiskUsage(tempDir)
Expect(err).ToNot(HaveOccurred())

// We expect to use less space as all references to the file inodes have been removed.
Expect(usageBytes).To(BeEquivalentTo(16384))

})

})