Skip to content

Commit

Permalink
Introduced a dedicated pathspec object (Velocidex#1440)
Browse files Browse the repository at this point in the history
This replaces the url in certain accessors because it is more reliable

It is now possible to nest zip files arbitrarily - if a compressed file is found in a zip file, the file is automatically extracted to a temp file for further processing.
  • Loading branch information
scudette authored Dec 18, 2021
1 parent ac4429e commit 461909b
Show file tree
Hide file tree
Showing 19 changed files with 760 additions and 241 deletions.
Binary file added artifacts/testdata/files/nested.zip
Binary file not shown.
42 changes: 39 additions & 3 deletions artifacts/testdata/server/testcases/zip.in.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,53 @@ Queries:

- SELECT basename(path=FullPath) as Name, Size FROM zip_files

# The following use the deprecated url() notation which is still
# supported for now.

# Find txt files inside the zip files.
- LET hits = SELECT * from foreach(row=zip_files, query= {
select FullPath, Mtime, Size from glob(globs=url(scheme='file', path=FullPath, fragment='/**/*.txt').String, accessor='zip') } )
select FullPath, Mtime, Size from glob(
globs=url(
scheme='file',
path=FullPath,
fragment='/**/*.txt'), accessor='zip') } )

- SELECT url(parse=FullPath).Fragment as Name, Size, Mtime from hits

# Make sure we can read the contents of zip members.
- SELECT Data, Offset, basename(path=FullPath) as Name FROM foreach(row=hits, query={ SELECT *, FullPath from read_file(filenames=FullPath, accessor='zip')})

- SELECT read_file(filename=url(path=srcDir+'/artifacts/testdata/files/hi.gz'), accessor='gzip') AS Hi,
read_file(filename=url(path=srcDir+'/artifacts/testdata/files/goodbye.bz2'), accessor='bzip2') AS Bye
- SELECT read_file(
filename=srcDir+'/artifacts/testdata/files/hi.gz',
accessor='gzip') AS Hi,
read_file(
filename=srcDir+'/artifacts/testdata/files/goodbye.bz2',
accessor='bzip2') AS Bye
FROM scope()

# Same as above but now we use the new pathspec() objects.
- LET hits = SELECT * from foreach(row=zip_files, query= {
select FullPath, Mtime, Size from glob(
globs=pathspec(DelegateAccessor='file',
DelegatePath=FullPath,
Path='/**/*.txt'),
accessor='zip')
} )

- SELECT pathspec(parse=FullPath).Path as Name, Size, Mtime from hits

# Make sure we can read the contents of zip members.
- SELECT Data, Offset, basename(path=FullPath) as Name
FROM foreach(row=hits, query={
SELECT *, FullPath from read_file(filenames=FullPath, accessor='zip')
})

- SELECT read_file(
filename=srcDir+'/artifacts/testdata/files/hi.gz',
accessor='gzip') AS Hi,
read_file(
filename=srcDir+'/artifacts/testdata/files/goodbye.bz2',
accessor='bzip2') AS Bye
FROM scope()

# Test the unzip() plugin
Expand Down
21 changes: 19 additions & 2 deletions artifacts/testdata/server/testcases/zip.out.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LET zip_files = SELECT FullPath, Size FROM glob( globs=srcDir+"/artifacts/testda
"Name": "test.zip",
"Size": 1064
}
]LET hits = SELECT * from foreach(row=zip_files, query= { select FullPath, Mtime, Size from glob(globs=url(scheme='file', path=FullPath, fragment='/**/*.txt').String, accessor='zip') } )[]SELECT url(parse=FullPath).Fragment as Name, Size, Mtime from hits[
]LET hits = SELECT * from foreach(row=zip_files, query= { select FullPath, Mtime, Size from glob( globs=url( scheme='file', path=FullPath, fragment='/**/*.txt'), accessor='zip') } )[]SELECT url(parse=FullPath).Fragment as Name, Size, Mtime from hits[
{
"Name": "test/secret.txt",
"Size": 1549,
Expand All @@ -15,7 +15,24 @@ LET zip_files = SELECT FullPath, Size FROM glob( globs=srcDir+"/artifacts/testda
"Offset": 0,
"Name": "secret.txt"
}
]SELECT read_file(filename=url(path=srcDir+'/artifacts/testdata/files/hi.gz'), accessor='gzip') AS Hi, read_file(filename=url(path=srcDir+'/artifacts/testdata/files/goodbye.bz2'), accessor='bzip2') AS Bye FROM scope()[
]SELECT read_file( filename=srcDir+'/artifacts/testdata/files/hi.gz', accessor='gzip') AS Hi, read_file( filename=srcDir+'/artifacts/testdata/files/goodbye.bz2', accessor='bzip2') AS Bye FROM scope()[
{
"Hi": "hello world\n",
"Bye": "goodbye world\n"
}
]LET hits = SELECT * from foreach(row=zip_files, query= { select FullPath, Mtime, Size from glob( globs=pathspec(DelegateAccessor='file', DelegatePath=FullPath, Path='/**/*.txt'), accessor='zip') } )[]SELECT pathspec(parse=FullPath).Path as Name, Size, Mtime from hits[
{
"Name": "test/secret.txt",
"Size": 1549,
"Mtime": "2019-02-12T11:47:10Z"
}
]SELECT Data, Offset, basename(path=FullPath) as Name FROM foreach(row=hits, query={ SELECT *, FullPath from read_file(filenames=FullPath, accessor='zip') })[
{
"Data": "Just some text:\n\n GNU AFFERO GENERAL PUBLIC LICENSE\n Version 3, 19 November 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. \u003chttps://fsf.org/\u003e\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n Preamble\n\n The GNU Affero General Public License is a free, copyleft license for\nsoftware and other kinds of works, specifically designed to ensure\ncooperation with the community in the case of network server software.\n \n The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works. By contrast,\nour General Public Licenses are intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.\n \n When we speak of free software, we are referring to freedom, not\nprice. Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n Developers that use our General Public Licenses protect your rights\nwith two steps: (1) assert copyright on the software, and (2) offer\nyou this License which gives you legal permission to copy, distribute\nand/or modify the software.\n \n\n\nThis is my secret.\n",
"Offset": 0,
"Name": "secret.txt\"}"
}
]SELECT read_file( filename=srcDir+'/artifacts/testdata/files/hi.gz', accessor='gzip') AS Hi, read_file( filename=srcDir+'/artifacts/testdata/files/goodbye.bz2', accessor='bzip2') AS Bye FROM scope()[
{
"Hi": "hello world\n",
"Bye": "goodbye world\n"
Expand Down
3 changes: 2 additions & 1 deletion bin/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,8 @@ func doDumpApiClientConfig() {
kingpin.FatalIfError(err, "Unable to encode config.")
}

fd, err := os.OpenFile(*config_api_client_output, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
fd, err := os.OpenFile(*config_api_client_output,
os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
kingpin.FatalIfError(err, "Unable to open output file: ")

_, err = fd.Write(res)
Expand Down
6 changes: 4 additions & 2 deletions bin/gui.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ func doGUI() {
serialized, err := yaml.Marshal(config_obj)
kingpin.FatalIfError(err, "Unable to create config.")

fd, err := os.OpenFile(server_config_path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
fd, err := os.OpenFile(server_config_path,
os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
kingpin.FatalIfError(err, "Open file %s", server_config_path)
_, err = fd.Write(serialized)
kingpin.FatalIfError(err, "Write file %s", server_config_path)
Expand All @@ -129,7 +130,8 @@ func doGUI() {
serialized, err = yaml.Marshal(client_config)
kingpin.FatalIfError(err, "Unable to create config.")

fd, err = os.OpenFile(client_config_path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
fd, err = os.OpenFile(client_config_path,
os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
kingpin.FatalIfError(err, "Open file %s", client_config_path)
_, err = fd.Write(serialized)
kingpin.FatalIfError(err, "Write file %s", client_config_path)
Expand Down
9 changes: 5 additions & 4 deletions constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ const (
// Number of clusters to cache in memory (default 100).
NTFS_CACHE_SIZE = "NTFS_CACHE_SIZE"

RAW_REG_CACHE_SIZE = "RAW_REG_CACHE_SIZE"
BINARY_CACHE_SIZE = "BINARY_CACHE_SIZE"
EVTX_FREQUENCY = "EVTX_FREQUENCY"
USN_FREQUENCY = "USN_FREQUENCY"
RAW_REG_CACHE_SIZE = "RAW_REG_CACHE_SIZE"
BINARY_CACHE_SIZE = "BINARY_CACHE_SIZE"
EVTX_FREQUENCY = "EVTX_FREQUENCY"
USN_FREQUENCY = "USN_FREQUENCY"
ZIP_FILE_CACHE_SIZE = "ZIP_FILE_CACHE_SIZE"
)

type key int
Expand Down
53 changes: 53 additions & 0 deletions file_store/test_utils/query.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package test_utils

import (
"context"
"time"

"github.com/Velocidex/ordereddict"
config_proto "www.velocidex.com/golang/velociraptor/config/proto"
"www.velocidex.com/golang/velociraptor/logging"
"www.velocidex.com/golang/velociraptor/services"
vql_subsystem "www.velocidex.com/golang/velociraptor/vql"
"www.velocidex.com/golang/vfilter"
)

// A convenience function for running a query and getting back a set
// of rows.
func RunQuery(
config_obj *config_proto.Config,
query string,
env *ordereddict.Dict) ([]*ordereddict.Dict, error) {

builder := services.ScopeBuilder{
Config: config_obj,
ACLManager: vql_subsystem.NullACLManager{},
Logger: logging.NewPlainLogger(
config_obj, &logging.FrontendComponent),
Env: env,
}
manager, err := services.GetRepositoryManager()
if err != nil {
return nil, err
}

scope := manager.BuildScope(builder)
defer scope.Close()

ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()

multi_vql, err := vfilter.MultiParse(query)
if err != nil {
return nil, err
}

rows := []*ordereddict.Dict{}
for _, vql := range multi_vql {
for row := range vql.Eval(ctx, scope) {
rows = append(rows, vfilter.RowToDict(ctx, scope, row))
}
}

return rows, nil
}
141 changes: 141 additions & 0 deletions glob/pathspec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package glob

import (
"net/url"

errors "github.com/pkg/errors"
"www.velocidex.com/golang/velociraptor/json"
)

var (
InvalidPathSpec = errors.New("Invalid PathSpec")
)

/*
A PathSpec is a more precise indication of a path to open a source
of data. In Velociraptor, access to data is provided by the use of
"Accessors" - a registered driver capable of reading data from
certain sources.
Accessors can delegate to other accessors using the PathSpec. This
delegation allows an accessor to receive additional information in
order to properly create the filesystem abstraction.
For example, consider the "zip" accessor which is responsible for
reading compressed archives. In order to retrieve a file inside the
zip file, the accessor needs the following pieces of data:
1. A delegate accessor to use to open the underlying zip file.
2. A path to provide to the delegate accessor.
3. The name of the zip member to open.
For example the following path spec:
{"Accessor": "file",
"DelegatePath": "/tmp/file.zip",
"Path": "zip_member.exe"}
Provides all this information.
PathSpecs are supposed to be serialized into strings and passed as
the filename to plugins that require file paths. The PathSpec is
just a more detailed path representation and is treated everywhere
as a plain string (json encoded).
Therefore the following path spec is valid for a recursive path
{"Accessor": "zip",
"DelegatePath": "{\"Accessor\": \"file\", \"DelegatePath\": \"/tmp/file.zip\", \"Path\": \"embedded.zip\"}",
"Path": "zip_member.exe"}
Given to the zip accessor, this PathSpec means to use the "zip"
accessor to open a member "embedded.zip" inside a file
"/tmp/file.zip", then to search within that embedded zip for a
"zip_member.exe"
For convenience, the PathSpec also supports a structured delegate so
the following serialization is also valid.
{"Accessor": "zip",
"Delegate": {
"Accessor": "file",
"DelegatePath": "/tmp/file.zip",
"Path": "embedded.zip"
},
"Path": "zip_member.exe"}
## Note:
In previous versions, the PathSpec abstraction was provided by
mapping URL parts to the fields above. This proved problematic
because URL encoding is lossy and not robust enough for round
tripping of all paths.
It also produces difficult to read paths. The old URL way is
deprecated but still supported - it will eventually be dropped.
*/
type PathSpec struct {
DelegateAccessor string `json:"DelegateAccessor,omitempty"`
DelegatePath string `json:"DelegatePath,omitempty"`
Delegate *PathSpec `json:"Delegate,omitempty"`
Path string `json:"Path,omitempty"`

// Keep track of if the pathspec came from a URL based for
// backwards compatibility.
url_based bool
}

func (self PathSpec) GetDelegatePath() string {
if self.Delegate != nil {
return self.Delegate.String()
}
return self.DelegatePath
}

func (self PathSpec) String() string {
if self.url_based {
result := url.URL{
Scheme: self.DelegateAccessor,
Path: self.DelegatePath,
Fragment: self.Path,
}

return result.String()
}

return json.MustMarshalString(self)
}

func PathSpecFromString(parsed string) (*PathSpec, error) {
if len(parsed) == 0 {
return nil, InvalidPathSpec
}

// It is a serialized JSON object.
if parsed[0] == '{' {
result := &PathSpec{}
err := json.Unmarshal([]byte(parsed), result)
return result, err
}

// It can be a URL
parsed_url, err := url.Parse(parsed)
if err != nil {
return nil, InvalidPathSpec
}

// It looks like a windows path not a URL
if len(parsed_url.Scheme) == 1 {
return &PathSpec{
DelegatePath: parsed,
}, nil
}

// Support urls for backwards compatibility.
return &PathSpec{
DelegateAccessor: parsed_url.Scheme,
DelegatePath: parsed_url.Path,
Path: parsed_url.Fragment,
url_based: true,
}, nil
}
5 changes: 3 additions & 2 deletions uploads/file_based.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (self *FileBasedUploader) Upload(
return result, nil
}

file, err := os.OpenFile(file_path, os.O_RDWR|os.O_CREATE, 0700)
file, err := os.OpenFile(file_path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0700)
if err != nil {
scope.Log("Unable to open file %s: %s", file_path, err.Error())
return nil, err
Expand Down Expand Up @@ -217,7 +217,8 @@ func (self *FileBasedUploader) maybeCollectSparseFile(

// If there were any sparse runs, create an index.
if is_sparse {
writer, err := os.OpenFile(sanitized_name+".idx", os.O_RDWR|os.O_CREATE, 0700)
writer, err := os.OpenFile(sanitized_name+".idx",
os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0700)
if err != nil {
return nil, err
}
Expand Down
10 changes: 5 additions & 5 deletions vql/filesystem/fixtures/TestReferenceCount.golden
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
[
{
"Base": "hello.zip#hello.txt",
"Base": "hello.txt",
"Data": "hello\n"
},
{
"Base": "hello.zip#hello1.txt",
"Base": "hello1.txt",
"Data": "hello1\n"
},
{
"Base": "hello.zip#hello2.txt",
"Base": "hello2.txt",
"Data": "hello2\n"
},
{
"Base": "hello.zip#hello3.txt",
"Base": "hello3.txt",
"Data": "hello3\n"
},
{
"Base": "hello.zip#hello4.txt",
"Base": "hello4.txt",
"Data": "hello4\n"
}
]
Loading

0 comments on commit 461909b

Please sign in to comment.