Skip to content

Commit

Permalink
Allows arbitrary length filenames in filestore. (Velocidex#3736)
Browse files Browse the repository at this point in the history
On Linux file names must be less then 255 bytes. The filestore attempts
to preserve and sanitize the original filename of uploaded files, which
means that in some cases the filestore filename can easily exceed the
hard limit imposed by the OS.

This PR switches to a hash based compression when the filename exceeds
250 chars. This replaces the filestore filename with a hash, and the
full component is stored in the datastore separately. This allows us to
store arbitrary length filenames in the filestore.
  • Loading branch information
scudette authored Sep 7, 2024
1 parent 28eef1a commit 2169f99
Show file tree
Hide file tree
Showing 23 changed files with 443 additions and 215 deletions.
6 changes: 3 additions & 3 deletions artifacts/definitions/Server/Utils/StartHuntExample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ description: |
```
This will allow users with the `COLLECT_BASIC` permission to also
collect it. Once collected the artifact specifies an impersonation
collect it. Once collected the artifact specifies the impersonate
field to `admin` which will cause it to run under the `admin` user's
permissions.
Expand All @@ -40,9 +40,9 @@ description: |
type: SERVER

# Collect this artifact under the admin user permissions.
impersonation: admin
impersonate: admin

source:
sources:
- query: |
-- This query will run with admin ACLs.
SELECT hunt(
Expand Down
2 changes: 1 addition & 1 deletion bin/hunts.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func doHuntReconstruct() error {
return err
}
fmt.Printf("Rebuilding %v to %v\n", hunt.HuntId,
target.AsDatastoreFilename(config_obj))
target.String())
}
}
}
Expand Down
39 changes: 39 additions & 0 deletions datastore/datastore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"os"
"sort"
"strings"
"sync"
"testing"
"time"
Expand Down Expand Up @@ -328,6 +329,44 @@ func (self BaseTestSuite) TestListChildrenSubdirs() {
"/Root/item"}, asStrings(children))
}

// Make sure all the other data stores handle very long filenames
func (self BaseTestSuite) TestVeryLongFilename() {
message := &crypto_proto.VeloMessage{Source: "Server"}
very_long_filename := strings.Repeat("Very Long Filename", 100)
assert.Equal(self.T(), len(very_long_filename), 1800)

path := path_specs.NewUnsafeDatastorePath("longfiles", very_long_filename)
filename := datastore.AsDatastoreFilename(
self.datastore, self.config_obj, path)

// Filename should be smaller than the read filename because it is
// compressed into a hash.
assert.True(self.T(), len(filename) < 250)
err := self.datastore.SetSubject(
self.config_obj, path, message)
assert.NoError(self.T(), err)

read_message := &crypto_proto.VeloMessage{}
err = self.datastore.GetSubject(self.config_obj,
path, read_message)
assert.NoError(self.T(), err)

assert.Equal(self.T(), message.Source, read_message.Source)

// Now test that ListChildren works properly.
children, err := self.datastore.ListChildren(
self.config_obj, path_specs.NewUnsafeDatastorePath("longfiles"))
assert.NoError(self.T(), err)

results := []string{}
for _, i := range children {
results = append(results, i.Base())

// Make sure the resulting filename is very long
assert.Equal(self.T(), i.Base(), very_long_filename)
}
}

func benchmarkSearchClient(b *testing.B,
data_store datastore.DataStore,
config_obj *config_proto.Config) {
Expand Down
62 changes: 36 additions & 26 deletions datastore/filebased.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ func (self *FileBaseDataStore) GetSubject(

defer InstrumentWithDelay("read", "FileBaseDataStore", urn)()

Trace(config_obj, "GetSubject", urn)
serialized_content, err := readContentFromFile(config_obj, urn)
Trace(self, config_obj, "GetSubject", urn)
serialized_content, err := readContentFromFile(self, config_obj, urn)
if err != nil {
return fmt.Errorf("While opening %v: %w", urn.AsClientPath(),
os.ErrNotExist)
Expand Down Expand Up @@ -150,22 +150,22 @@ func (self *FileBaseDataStore) SetSubjectWithCompletion(
}
}()

Trace(config_obj, "SetSubject", urn)
Trace(self, config_obj, "SetSubject", urn)

// Encode as JSON
if urn.Type() == api.PATH_TYPE_DATASTORE_JSON {
serialized_content, err := protojson.Marshal(message)
if err != nil {
return err
}
return writeContentToFile(config_obj, urn, serialized_content)
return writeContentToFile(self, config_obj, urn, serialized_content)
}
serialized_content, err := proto.Marshal(message)
if err != nil {
return errors.Wrap(err, 0)
}

return writeContentToFile(config_obj, urn, serialized_content)
return writeContentToFile(self, config_obj, urn, serialized_content)
}

func (self *FileBaseDataStore) DeleteSubjectWithCompletion(
Expand All @@ -187,9 +187,9 @@ func (self *FileBaseDataStore) DeleteSubject(

defer InstrumentWithDelay("delete", "FileBaseDataStore", urn)()

Trace(config_obj, "DeleteSubject", urn)
Trace(self, config_obj, "DeleteSubject", urn)

err := os.Remove(urn.AsDatastoreFilename(config_obj))
err := os.Remove(AsDatastoreFilename(self, config_obj, urn))

// It is ok to remove a file that does not exist.
if err != nil && os.IsExist(err) {
Expand All @@ -201,13 +201,13 @@ func (self *FileBaseDataStore) DeleteSubject(
return nil
}

func listChildren(config_obj *config_proto.Config,
func (self *FileBaseDataStore) listChildren(config_obj *config_proto.Config,
urn api.DSPathSpec) ([]os.FileInfo, error) {

defer InstrumentWithDelay("list", "FileBaseDataStore", urn)()

children, err := utils.ReadDirUnsorted(
urn.AsDatastoreDirectory(config_obj))
AsDatastoreDirectory(self, config_obj, urn))
if err != nil {
if os.IsNotExist(err) {
return []os.FileInfo{}, nil
Expand Down Expand Up @@ -237,9 +237,9 @@ func (self *FileBaseDataStore) ListChildren(
urn api.DSPathSpec) (
[]api.DSPathSpec, error) {

TraceDirectory(config_obj, "ListChildren", urn)
TraceDirectory(self, config_obj, "ListChildren", urn)

all_children, err := listChildren(config_obj, urn)
all_children, err := self.listChildren(config_obj, urn)
if err != nil {
return nil, err
}
Expand All @@ -257,13 +257,18 @@ func (self *FileBaseDataStore) ListChildren(
return children[i].ModTime().UnixNano() < children[j].ModTime().UnixNano()
})

db, err := GetDB(config_obj)
if err != nil {
return nil, err
}

// Slice the result according to the required offset and count.
result := make([]api.DSPathSpec, 0, len(children))
for _, child := range children {
var child_pathspec api.DSPathSpec

if child.IsDir() {
name := utils.UnsanitizeComponent(child.Name())
name := UncompressComponent(db, config_obj, child.Name())
result = append(result, urn.AddUnsafeChild(name).SetDir())
continue
}
Expand All @@ -275,7 +280,8 @@ func (self *FileBaseDataStore) ListChildren(
continue
}

name := utils.UnsanitizeComponent(child.Name()[:len(extension)])
name := UncompressComponent(db,
config_obj, child.Name()[:len(extension)])

// Skip over files that do not belong in the data store.
if spec_type == api.PATH_TYPE_DATASTORE_UNKNOWN {
Expand All @@ -294,14 +300,15 @@ func (self *FileBaseDataStore) ListChildren(
// Called to close all db handles etc. Not thread safe.
func (self *FileBaseDataStore) Close() {}

func writeContentToFile(config_obj *config_proto.Config,
func writeContentToFile(
db DataStore, config_obj *config_proto.Config,
urn api.DSPathSpec, data []byte) error {

if config_obj.Datastore == nil {
return datastoreNotConfiguredError
}

filename := urn.AsDatastoreFilename(config_obj)
filename := AsDatastoreFilename(db, config_obj, urn)

// Truncate the file immediately so we dont need to make a seocnd
// syscall. Empirically on Linux, a truncate call always works,
Expand Down Expand Up @@ -339,13 +346,14 @@ func writeContentToFile(config_obj *config_proto.Config,
}

func readContentFromFile(
config_obj *config_proto.Config, urn api.DSPathSpec) ([]byte, error) {
db DataStore, config_obj *config_proto.Config,
urn api.DSPathSpec) ([]byte, error) {

if config_obj.Datastore == nil {
return nil, datastoreNotConfiguredError
}

file, err := os.Open(urn.AsDatastoreFilename(config_obj))
file, err := os.Open(AsDatastoreFilename(db, config_obj, urn))
if err == nil {
defer file.Close()

Expand All @@ -363,9 +371,8 @@ func readContentFromFile(
if os.IsNotExist(err) &&
urn.Type() == api.PATH_TYPE_DATASTORE_JSON {

file, err := os.Open(urn.
SetType(api.PATH_TYPE_DATASTORE_PROTO).
AsDatastoreFilename(config_obj))
file, err := os.Open(AsDatastoreFilename(
db, config_obj, urn.SetType(api.PATH_TYPE_DATASTORE_PROTO)))

if err == nil {
defer file.Close()
Expand All @@ -382,30 +389,33 @@ func readContentFromFile(
return nil, errors.Wrap(err, 0)
}

func Trace(config_obj *config_proto.Config,
func Trace(
db DataStore,
config_obj *config_proto.Config,
name string, filename api.DSPathSpec) {

return

fmt.Printf("Trace FileBaseDataStore: %v: %v\n", name,
filename.AsDatastoreFilename(config_obj))
AsDatastoreFilename(db, config_obj, filename))
}

func TraceDirectory(config_obj *config_proto.Config,
func TraceDirectory(
db DataStore, config_obj *config_proto.Config,
name string, filename api.DSPathSpec) {

return

fmt.Printf("Trace FileBaseDataStore: %v: %v\n", name,
filename.AsDatastoreDirectory(config_obj))
AsDatastoreDirectory(db, config_obj, filename))
}

// Support RawDataStore interface
func (self *FileBaseDataStore) GetBuffer(
config_obj *config_proto.Config,
urn api.DSPathSpec) ([]byte, error) {

return readContentFromFile(config_obj, urn)
return readContentFromFile(self, config_obj, urn)
}

func (self *FileBaseDataStore) Error() error {
Expand All @@ -431,7 +441,7 @@ func (self *FileBaseDataStore) SetBuffer(
return err
}

err = writeContentToFile(config_obj, urn, data)
err = writeContentToFile(self, config_obj, urn, data)
if completion != nil &&
!utils.CompareFuncs(completion, utils.SyncCompleter) {
completion()
Expand Down
1 change: 1 addition & 0 deletions datastore/filebased_generic.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build !linux
// +build !linux

package datastore
Expand Down
27 changes: 23 additions & 4 deletions datastore/filebased_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ func (self FilebasedTestSuite) TestFullDiskErrors() {
assert.NoError(self.T(), err)

// Fill the disk now
fd, err := os.OpenFile(
pad_path.AsDatastoreFilename(self.config_obj),
fd, err := os.OpenFile(datastore.AsDatastoreFilename(
self.datastore, self.config_obj, pad_path),
os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660)
assert.NoError(self.T(), err)
fillUpDisk(fd)
Expand Down Expand Up @@ -109,8 +109,9 @@ func (self FilebasedTestSuite) TestGetSubjectOfEmptyFileIsError() {
path := path_specs.NewUnsafeDatastorePath("test")

// Create an empty file
fd, err := os.OpenFile(
path.AsDatastoreFilename(self.config_obj), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660)
fd, err := os.OpenFile(datastore.AsDatastoreFilename(
self.datastore, self.config_obj, path),
os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660)
assert.NoError(self.T(), err)
fd.Close()

Expand All @@ -131,6 +132,24 @@ func (self FilebasedTestSuite) TestSetGetJSON() {
// self.DumpDirectory()
}

// On linux maximum size of filename is 255 bytes. This means that
// with the addition of unicode escapes we might exceed this with even
// very short filenames.
func (self FilebasedTestSuite) TestVeryLongFilenameHashEncoding() {
very_long_filename := strings.Repeat("Very Long Filename", 100)
assert.Equal(self.T(), len(very_long_filename), 1800)

path := path_specs.NewUnsafeDatastorePath("longfiles", very_long_filename)
filename := datastore.AsDatastoreFilename(
self.datastore, self.config_obj, path)

// Filename should be smaller than the read filename because it is
// compressed into a hash.
assert.True(self.T(), len(filename) < 250)
assert.Equal(self.T(), filepath.Base(filename),
"#8ad0b37a7718f0403aa86f9c6bcfff35ef6ad39f.json.db")
}

func (self *FilebasedTestSuite) SetupTest() {
var err error
self.dirname, err = tempfile.TempDir("datastore_test")
Expand Down
Loading

0 comments on commit 2169f99

Please sign in to comment.