Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 91 additions & 94 deletions conf/topics.json
Original file line number Diff line number Diff line change
@@ -1,122 +1,119 @@
{
"run.topic": {
"type": "object",
"properties": {
"app_id_snow": {
"description": "Application ID or ServiceNow identifier",
"type": "string"
},
"data_definition_id": {
"description": "Identifier for the data definition",
"type": "string"
},
"environment": {
"description": "Environment",
"type": "string"
},
"guid": {
"description": "Unique identifier for the event (GUID)",
"type": "string"
"event_id": {
"type": "string",
"description": "Unique identifier for the event (GUID), generated for each unique event, for de-duplication purposes"
},
"job_ref": {
"description": "Identifier of the job in it’s respective system.",
"type": "string"
"type": "string",
"description": "Identifier of the job in it’s respective system (e.g. Spark Application Id, Glue Job Id, EMR Step Id, etc)."
},
"message": {
"description": "Pipeline status message.",
"type": "string"
"tenant_id ": {
"type": "string",
"description": "Application ID (4 letter code) or ServiceNow identifier related to the pipeline/domain/process owner (tenant of the tool)"
},
"source_app": {
"description": "Source application name",
"type": "string"
},
"status": {
"description": "Status of the run. Does not speak of the quality.",
"enum": [
"Finished",
"Failed",
"Killed"
],
"type": "string"
"type": "string",
"description": "Standardized source application name (aqueduct, unify, lum, etc)"
},
"timestamp_end": {
"description": "End timestamp of the run in epoch milliseconds",
"type": "number"
"source_app_version": {
"type": "string",
"description": "Source application version (SemVer preferred)"
},
"environment": {
"type": "string",
"description": "Environment (dev, uat, pre-prod, prod, test or others)"
},
"timestamp_start": {
"description": "Start timestamp of the run in epoch milliseconds",
"type": "number"
"type": "number",
"description": "Start timestamp of the run in epoch milliseconds"
},
"timestamp_end": {
"type": "number",
"description": "End timestamp of the run in epoch milliseconds"
},
"jobs": {
"type": "array",
"description": "List of individual jobs withing the run",
"element_type": "object",
"object_schema": {
"catalog_id": {
"type": "string",
"description": "Identifier for the data definition (Glue/Hive) database and table name for example"
},
"status": {
"type": "string",
"enum": ["succeeded", "failed", "killed", "skipped"],
"description": "Status of the job."
},
"timestamp_start": {
"type": "number",
"description": "Start timestamp of a job that is a part of a run in epoch milliseconds"
},
"timestamp_end": {
"type": "number",
"description": "End timestamp of a job that is a part of a run in epoch milliseconds"
},
"message": {
"type": "string",
"description": "Job status/error message."
}
}
}
},
"required": [
"guid",
"app_id_snow",
"source_app",
"timestamp_start",
"timestamp_end",
"data_definition_id",
"status"
],
"type": "object"
"required": ["event_id", "job_ref", "tenant_id", "source_app", "source_app_version", "environment", "timestamp_start", "timestamp_end", "jobs"]
},
"edla.change.topic": {
"type": "object",
"properties": {
"app_id_snow": {
"description": "Application ID or ServiceNow identifier",
"type": "string"
"event_id": {
"type": "string",
"description": "Unique identifier for the event (GUID)"
},
"data_definition_id": {
"description": "Identifier for the data definition",
"type": "string"
"tenant_id": {
"type": "string",
"description": "Application ID or ServiceNow identifier"
},
"environment": {
"description": "Environment",
"type": "string"
"source_app": {
"type": "string",
"description": " Standardized source application name (aqueduct, unify, lum, etc)"
},
"format": {
"description": "Format of the data",
"type": "string"
"source_app_version": {
"type": "string",
"description": "Source application version (SemVer preferred)"
},
"guid": {
"description": "Unique identifier for the event (GUID)",
"type": "string"
"environment": {
"type": "string",
"description": "Environment (dev, uat, pre-prod, prod, test or others)"
},
"location": {
"description": "Location of the data",
"type": "string"
"timestamp_event": {
"type": "number",
"description": "Timestamp of the event in epoch milliseconds"
},
"catalog_id": {
"type": "string",
"description": "Identifier for the data definition (Glue/Hive) database and table name for example "
},
"operation": {
"description": "Operation performed",
"enum": [
"CREATE",
"UPDATE",
"ARCHIVE"
],
"type": "string"
},
"schema_link": {
"description": "Link to the data schema",
"type": "string"
"type": "string",
"enum": ["overwrite", "append", "archive", "delete"],
"description": "Operation performed"
},
"source_app": {
"description": "Source application name",
"type": "string"
"location": {
"type": "string",
"description": "Location of the data"
},
"timestamp_event": {
"description": "Timestamp of the event in epoch milliseconds",
"type": "number"
"format": {
"type": "string",
"description": "Format of the data (parquet, delta, crunch, etc)."
},
"format_options": {
"type": "object",
"description": "When possible, add additional options related to the format"
}
},
"required": [
"guid",
"app_id_snow",
"source_app",
"timestamp_event",
"data_definition_id",
"operation",
"location",
"format",
"schema_link"
],
"type": "object"
"required": ["event_id", "tenant_id", "source_app", "source_app_version", "environment", "timestamp_event", "catalog_id", "operation", "format"]
}
}
}
25 changes: 13 additions & 12 deletions scripts/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,18 @@
" \"pathParameters\": {\"topic_name\": \"edla.change.topic\"},\n",
" \"headers\": {\"bearer\": jwtToken},\n",
" \"body\": json.dumps({\n",
" \"app_id_snow\": \"app-1234\",\n",
" \"data_definition_id\": \"data-def-5678\",\n",
" \"environment\": \"DEV\",\n",
" \"format\": \"JSON\",\n",
" \"guid\": \"550e8400-e29b-41d4-a716-446655440000\",\n",
" \"location\": \"s3://data-lake/customer_data\",\n",
" \"operation\": \"CREATE\",\n",
" \"schema_link\": \"https://schema-registry.company.com/schemas/data-def-5678\",\n",
" \"source_app\": \"DataIngestionApp\",\n",
" \"timestamp_event\": 1657896543210\n",
" })\n",
" \"event_id\": \"JupyterEventId\",\n",
" \"tenant_id\": \"JupyterTenantId\",\n",
" \"source_app\": \"JupyterSrc\",\n",
" \"source_app_version\": \"v2024-10-17\",\n",
" \"environment\": \"JupyterEnv\",\n",
" \"timestamp_event\": 1729602770000,\n",
" \"catalog_id\": \"TestCatalog\",\n",
" \"operation\": \"delete\",\n",
" \"location\": \"UnitTest\",\n",
" \"format\": \"TestFormat\",\n",
" \"formatOptions\": {\"Foo\" : \"Bar\"}\n",
" })\n",
"}, {})"
]
},
Expand Down Expand Up @@ -128,7 +129,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.6"
}
},
"nbformat": 4,
Expand Down