Skip to content

Commit 53d5aff

Browse files
ebeahanwebmat
andcommitted
Add --ref support for experimental artifacts (elastic#1063)
Co-authored-by: Mathieu Martin <webmat@gmail.com>
1 parent 46210a5 commit 53d5aff

File tree

7 files changed

+108
-8
lines changed

7 files changed

+108
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ All notable changes to this project will be documented in this file based on the
5151

5252
* Field details Jinja2 template components have been consolidated into one template #897
5353
* Add `[discrete]` marker before each section header in field details. #989
54+
* `--ref` now loads `experimental/schemas` based on git ref in addition to `schemas`. #1063
5455

5556

5657
## [1.6.0](https://github.com/elastic/ecs/compare/v1.5.0...v1.6.0)

USAGE.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ And looking at a specific artifact, `../myprojects/out/generated/elasticsearch/7
188188
...
189189
```
190190

191+
Include can be used together with the `--ref` flag to merge custom fields into a targeted ECS version. See [`Ref`](#ref).
192+
191193
> NOTE: The `--include` mechanism will not validate custom YAML files prior to merging. This allows for modifying existing ECS fields in a custom schema without having to redefine all the mandatory field attributes.
192194
193195
#### Subset
@@ -235,12 +237,26 @@ It's also possible to combine `--include` and `--subset` together! Do note that
235237

236238
#### Ref
237239

238-
The `--ref` argument allows for passing a specific `git` tag (e.g. `v.1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts.
240+
The `--ref` argument allows for passing a specific `git` tag (e.g. `v1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts.
239241

240242
```
241243
$ python scripts/generator.py --ref v1.5.0
242244
```
243245
246+
The `--ref` argument loads field definitions from the specified git reference (branch, tag, etc.) from directories [`./schemas`](./schemas) and [`./experimental/schemas`](./experimental/schemas) (when specified via `--include`).
247+
248+
Here's another example loading both ECS fields and [experimental](experimental/README.md) changes *from branch "1.7"*, then adds custom fields on top.
249+
250+
```
251+
$ python scripts/generator.py --ref 1.7 --include experimental/schemas ../myproject/fields/custom --out ../myproject/out
252+
```
253+
254+
The command above will produce artifacts based on:
255+
256+
* main ECS field definitions as of branch 1.7
257+
* experimental ECS changes as of branch 1.7
258+
* custom fields in `../myproject/fields/custom` as they are on the filesystem
259+
244260
> Note: `--ref` does have a dependency on `git` being installed and all expected commits/tags fetched from the ECS upstream repo. This will unlikely be an issue unless you downloaded the ECS as a zip archive from GitHub vs. cloning it.
245261
246262
#### Mapping & Template Settings

scripts/generator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def main():
6363

6464
def argument_parser():
6565
parser = argparse.ArgumentParser()
66-
parser.add_argument('--ref', action='store', help='git reference to use when building schemas')
66+
parser.add_argument('--ref', action='store', help='Loads fields definitions from `./schemas` subdirectory from specified git reference. \
67+
Note that "--include experimental/schemas" will also respect this git ref.')
6768
parser.add_argument('--include', nargs='+',
6869
help='include user specified directory of custom field definitions')
6970
parser.add_argument('--subset', nargs='+',

scripts/generators/ecs_helpers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,14 @@ def get_tree_by_ref(ref):
114114
return commit.tree
115115

116116

117+
def path_exists_in_git_tree(tree, file_path):
118+
try:
119+
_ = tree[file_path]
120+
except KeyError:
121+
return False
122+
return True
123+
124+
117125
def usage_doc_files():
118126
usage_docs_dir = os.path.join(os.path.dirname(__file__), '../../docs/usage')
119127
usage_docs_path = pathlib.Path(usage_docs_dir)

scripts/schema/loader.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,18 @@ def load_schemas(ref=None, included_files=[]):
5151
schema_files_raw = load_schema_files(ecs_helpers.ecs_files())
5252
fields = deep_nesting_representation(schema_files_raw)
5353

54-
# Custom additional files (never from git ref)
54+
EXPERIMENTAL_SCHEMA_DIR = 'experimental/schemas'
55+
56+
# Custom additional files
5557
if included_files and len(included_files) > 0:
5658
print('Loading user defined schemas: {0}'.format(included_files))
59+
# If --ref provided and --include loading experimental schemas
60+
if ref and EXPERIMENTAL_SCHEMA_DIR in included_files:
61+
exp_schema_files_raw = load_schemas_from_git(ref, target_dir=EXPERIMENTAL_SCHEMA_DIR)
62+
exp_fields = deep_nesting_representation(exp_schema_files_raw)
63+
fields = merge_fields(fields, exp_fields)
64+
included_files.remove(EXPERIMENTAL_SCHEMA_DIR)
65+
# Remaining additional custom files (never from git ref)
5766
custom_files = ecs_helpers.get_glob_files(included_files, ecs_helpers.YAML_EXT)
5867
custom_fields = deep_nesting_representation(load_schema_files(custom_files))
5968
fields = merge_fields(fields, custom_fields)
@@ -68,13 +77,18 @@ def load_schema_files(files):
6877
return fields_nested
6978

7079

71-
def load_schemas_from_git(ref):
80+
def load_schemas_from_git(ref, target_dir='schemas'):
7281
tree = ecs_helpers.get_tree_by_ref(ref)
7382
fields_nested = {}
74-
for blob in tree['schemas'].blobs:
75-
if blob.name.endswith('.yml'):
76-
new_fields = read_schema_blob(blob, ref)
77-
fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields)
83+
84+
# Handles case if target dir doesn't exists in git ref
85+
if ecs_helpers.path_exists_in_git_tree(tree, target_dir):
86+
for blob in tree[target_dir].blobs:
87+
if blob.name.endswith('.yml'):
88+
new_fields = read_schema_blob(blob, ref)
89+
fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields)
90+
else:
91+
raise KeyError(f"Target directory './{target_dir}' not present in git ref '{ref}'!")
7892
return fields_nested
7993

8094

scripts/tests/test_ecs_helpers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,19 @@ def test_list_subtract(self):
9999
self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a']), ['b'])
100100
self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a', 'c']), ['b'])
101101

102+
# git helper tests
103+
102104
def test_get_tree_by_ref(self):
103105
ref = 'v1.5.0'
104106
tree = ecs_helpers.get_tree_by_ref(ref)
105107
self.assertEqual(tree.hexsha, '4449df245f6930d59bcd537a5958891261a9476b')
106108

109+
def test_path_exists_in_git_tree(self):
110+
ref = 'v1.6.0'
111+
tree = ecs_helpers.get_tree_by_ref(ref)
112+
self.assertFalse(ecs_helpers.path_exists_in_git_tree(tree, 'nonexistant'))
113+
self.assertTrue(ecs_helpers.path_exists_in_git_tree(tree, 'schemas'))
114+
107115

108116
if __name__ == '__main__':
109117
unittest.main()

scripts/tests/unit/test_schema_loader.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,21 @@ def test_load_schemas_no_custom(self):
7979
fields['process']['fields']['thread'].keys(),
8080
"Fields containing nested fields should at least have the 'fields' subkey")
8181

82+
def test_load_schemas_git_ref(self):
83+
fields = loader.load_schemas(ref='v1.6.0')
84+
self.assertEqual(
85+
['field_details', 'fields', 'schema_details'],
86+
sorted(fields['process'].keys()),
87+
"Schemas should have 'field_details', 'fields' and 'schema_details' subkeys")
88+
self.assertEqual(
89+
['field_details'],
90+
list(fields['process']['fields']['pid'].keys()),
91+
"Leaf fields should have only the 'field_details' subkey")
92+
self.assertIn(
93+
'fields',
94+
fields['process']['fields']['thread'].keys(),
95+
"Fields containing nested fields should at least have the 'fields' subkey")
96+
8297
@mock.patch('schema.loader.read_schema_file')
8398
def test_load_schemas_fail_on_accidental_fieldset_redefinition(self, mock_read_schema):
8499
mock_read_schema.side_effect = [
@@ -124,6 +139,43 @@ def test_nest_schema_raises_on_missing_schema_name(self):
124139
with self.assertRaisesRegex(ValueError, 'incomplete.yml'):
125140
loader.nest_schema([{'description': 'just a description'}], 'incomplete.yml')
126141

142+
def test_load_schemas_from_git(self):
143+
fields = loader.load_schemas_from_git('v1.0.0', target_dir='schemas')
144+
self.assertEqual(
145+
['agent',
146+
'base',
147+
'client',
148+
'cloud',
149+
'container',
150+
'destination',
151+
'ecs',
152+
'error',
153+
'event',
154+
'file',
155+
'geo',
156+
'group',
157+
'host',
158+
'http',
159+
'log',
160+
'network',
161+
'observer',
162+
'organization',
163+
'os',
164+
'process',
165+
'related',
166+
'server',
167+
'service',
168+
'source',
169+
'url',
170+
'user',
171+
'user_agent'],
172+
sorted(fields.keys()),
173+
"Raw schema fields should have expected fieldsets for v1.0.0")
174+
175+
def test_load_schemas_from_git_missing_target_directory(self):
176+
with self.assertRaisesRegex(KeyError, "not present in git ref 'v1.5.0'"):
177+
loader.load_schemas_from_git('v1.5.0', target_dir='experimental')
178+
127179
# nesting stuff
128180

129181
def test_nest_fields(self):

0 commit comments

Comments
 (0)