diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7707654..300d27e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,7 +16,7 @@ jobs: uses: rlespinasse/github-slug-action@3.5.1 - name: Pack it up! - run: tar -czf /tmp/${{ github.event.repository.name }}-${{ env.GITHUB_SHA_SHORT }}-${{ env.GITHUB_REF_SLUG_CS }}.crbl . + run: tar -czf /tmp/${{ github.event.repository.name }}-${{ env.GITHUB_SHA_SHORT }}-${{ env.GITHUB_REF_SLUG_CS }}.crbl data default LICENSE package.json README.md - name: Release if: ${{ success() }} diff --git a/README.md b/README.md index 6ff9422..b8f2d76 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,13 @@ You should expect to see 15-30% reduction in the size of your Palo Alto Firewall ## Installation --- -1. Download the most recent .crbl file from the repo [releases page](https://github.com/criblpacks/cribl-palo-alto-networks/releases). -2. Create a Route with with a filter for your Palo Alto Firewall events. A sample filter to match all events: +1. Install this pack from the [Cribl Pack Dispensary](https://packs.cribl.io), use the Git clone feature inside Cribl Stream, or download the most recent .crbl file from the repo [releases page](https://github.com/criblpacks/cribl-palo-alto-networks/releases). +2. Create a Route with a filter for your Palo Alto Firewall events. A sample filter to match all events: ``` (sourcetype=='pan:log' || sourcetype=='pan_log' || /^[^,]+,[^,]+,[^,]+,(THREAT|TRAFFIC|SYSTEM|CONFIG|HIPMATCH|CORRELATION|USERID|GLOBALPROTECT),/.test(_raw)) ``` -3. Select the `PAN` pack as the pipeline. -4. Configure the pack pipelines with the appropriate index for your Palo Alto logs. By default the index field will be set to `pan_logs`. +3. Select the `cribl-palo-alto-networks` pack as the pipeline. +4. Configure the Global Variable (`pan_default_index`) inside the Pack with the appropriate Splunk index for your Palo Alto logs. By default, the index field will be set to `pan_logs`. ### Configure Device Information This pack assumes all of your firewalls use UTC/GMT for their time zone configuration. If you use local time zones, please configure the `device_info.csv` lookup file (located in the pack's Knowledge content). @@ -33,6 +33,13 @@ FW-.*,Etc/GMT+1 ## Release Notes --- +### Version 1.1.0 - 2022-04-12 +* Fixes incorrect sourcetype set in Decryption pipeline +* Add explanations why fields are dropped +* New feature: use Global Variables to define default `index` and `source` field values. Change in one location instead of every pipeline! +* Rewrites pipeline logic to separate parser reserialize function into separate parser extract and serialize functions +* New feature: set the global variable `pan_device_name_as_host` to use set the `host` field value from the `dvc_host` field value instead of the syslog header. + ### Version 1.0.0 - 2022-03-22 * Update to version 1.0.0 - major release for new Pack Dispensary 🎉 * Changes Pack ID from `PAN` to `cribl-palo-alto-networks` to match naming convention of Cribl built Packs. diff --git a/default/pipelines/pan_config/conf.yml b/default/pipelines/pan_config/conf.yml index 75041bf..f5eb650 100644 --- a/default/pipelines/pan_config/conf.yml +++ b/default/pipelines/pan_config/conf.yml @@ -1,5 +1,8 @@ output: default -groups: {} +groups: + yicqwn: + name: Reserialization of Events + index: 6 asyncFuncTimeout: 1000 functions: - id: comment @@ -14,24 +17,29 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing - #5. Reshape the events using the parser to remove unnecessary fields + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value + + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7-10. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host + - value: (message || _raw).substring((message || _raw).indexOf(',')) + name: _raw - value: "'pan:config'" name: sourcetype - name: source - value: source || 'pan:syslog' - - value: index || 'pan_logs' + value: source || C.vars.pan_default_source + - value: index || C.vars.pan_default_index name: index - - value: (message || _raw).substring((message || _raw).indexOf(',')) - name: _raw keep: - _raw - _time @@ -41,6 +49,49 @@ functions: - sourcetype remove: - "*" + - id: serde + filter: "true" + disabled: null + conf: + mode: extract + type: csv + srcField: _raw + fields: + - future_use1 + - receive_time + - serial_number + - type + - log_subtype + - version + - generated_time + - host_name + - vsys + - command + - admin + - client + - result + - configuration_path + - sequence_number + - action_flags + - before_change_detail + - after_change_detail + - devicegroup_level1 + - devicegroup_level2 + - devicegroup_level3 + - devicegroup_level4 + - vsys_name + - dvc_name + - dvc_group + - audit_comment + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field - id: lookup filter: "true" disabled: null @@ -57,32 +108,54 @@ functions: outFields: - lookupField: tz eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. - id: auto_timestamp filter: "true" disabled: null conf: - srcField: _raw + srcField: generated_time dstField: _time defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" offset: 0 maxLen: 150 defaultTime: now latestDateAllowed: +1week earliestDateAllowed: -420weeks timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ + - regex: /(.*)/ strptime: "%Y/%m/%d %H:%M:%S" description: Use the correct "generated time" as the timestamp for the event. - - id: serde + - id: comment filter: "true" disabled: null conf: - mode: reserialize + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + remove: + - future_use* + - "*_time" + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false + conf: type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -110,6 +183,21 @@ functions: - dvc_name - dvc_group - audit_comment + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype remove: - - future_use* - - "*_time" + - "*" + description: Final field cleanup + groupId: yicqwn +description: CONFIG log type diff --git a/default/pipelines/pan_correlation/conf.yml b/default/pipelines/pan_correlation/conf.yml index c6f9f4e..01924d3 100644 --- a/default/pipelines/pan_correlation/conf.yml +++ b/default/pipelines/pan_correlation/conf.yml @@ -1,5 +1,8 @@ output: default -groups: {} +groups: + yicqwn: + name: Reserialization of Events + index: 6 asyncFuncTimeout: 1000 functions: - id: comment @@ -14,22 +17,27 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing - #4. Reshape the events using the parser to remove unnecessary fields + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value + + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7-10. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host - name: sourcetype value: "'pan:correlation'" - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' + value: index || C.vars.pan_default_index - name: _raw value: (message || _raw).substring((message || _raw).indexOf(',')) keep: @@ -41,6 +49,44 @@ functions: - sourcetype remove: - "*" + - id: serde + filter: "true" + disabled: null + conf: + mode: extract + type: csv + srcField: _raw + fields: + - future_use1 + - receive_time + - serial_number + - type + - content_threat_type + - future_use2 + - generated_time + - source_address_source_user + - virtual_system + - category + - severity + - device_group_hierarchy_level_1 + - device_group_hierarchy_level_2 + - device_group_hierarchy_level_3 + - device_group_hierarchy_level_4 + - virtual_system_name + - device_name + - virtual_system_id + - object_name + - object_id + - evidence + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field - id: lookup filter: "true" disabled: null @@ -57,32 +103,55 @@ functions: outFields: - lookupField: tz eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. - id: auto_timestamp filter: "true" disabled: null conf: - srcField: _raw + srcField: generated_time dstField: _time defaultTimezone: utc - timeExpression: "__tz ? (time.getTime() / 1000) + (__tz * 3600) : (time.getTime() / 1000)" + timeExpression: "__tz ? (time.getTime() / 1000) + (__tz * 3600) : + (time.getTime() / 1000)" offset: 0 maxLen: 150 defaultTime: now latestDateAllowed: +1week earliestDateAllowed: -420weeks timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ + - regex: /(.*)/ strptime: "%Y/%m/%d %H:%M:%S" description: Use the correct "generated time" as the timestamp for the event. - - id: serde + - id: comment filter: "true" disabled: null conf: - mode: reserialize + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time, timestamp - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + remove: + - future_use* + - "*_time" + - timestamp + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false + conf: type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -105,8 +174,21 @@ functions: - object_name - object_id - evidence + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype remove: - - future_use* - - "*_time" - - timestamp - - device_group_* + - "*" + description: Final field cleanup + groupId: yicqwn +description: CORRELATION log type diff --git a/default/pipelines/pan_decryption/conf.yml b/default/pipelines/pan_decryption/conf.yml index 5c6e708..4d9a4f8 100644 --- a/default/pipelines/pan_decryption/conf.yml +++ b/default/pipelines/pan_decryption/conf.yml @@ -1,5 +1,8 @@ output: default -groups: {} +groups: + yicqwn: + name: Reserialization of Events + index: 6 asyncFuncTimeout: 1000 functions: - id: comment @@ -14,24 +17,29 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing - #5. Reshape the events using the parser to remove unnecessary fields + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value + + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7-10. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host + - name: _raw + value: (message || _raw).substring((message || _raw).indexOf(',')) - name: sourcetype - value: "'pan:traffic'" + value: "'pan:decryption'" - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' - - name: _raw - value: (message || _raw).substring((message || _raw).indexOf(',')) + value: index || C.vars.pan_default_index keep: - _raw - _time @@ -41,6 +49,130 @@ functions: - sourcetype remove: - "*" + - id: serde + filter: "true" + disabled: null + conf: + mode: extract + type: csv + srcField: _raw + fields: + - future_use1 + - receive_time + - serial_number + - type + - log_subtype + - version + - generated_time + - src_ip + - dest_ip + - src_translated_ip + - dest_translated_ip + - rule + - src_user + - dest_user + - app + - vsys + - src_zone + - dest_zone + - src_interface + - dest_interface + - log_forwarding_profile + - start_time + - session_id + - repeat_count + - src_port + - dest_port + - src_translated_port + - dest_translated_port + - flags + - ip_protocol + - action + - tunnel_id + - future_use2 + - future_use3 + - src_vm_uuid + - dest_vm_uuid + - uuid_rule + - stage_client_firewall + - stage_firewall_client + - tls_version + - key_exchange_algorithm + - encryption_algorithm + - hash_algorithm + - policy_name + - elliptic_curve + - error_index + - root_status + - chain_status + - proxy_type + - cert_serial_number + - fingerprint + - cert_start_time + - cert_end_time + - cert_version + - cert_size + - cn_length + - issuer_cn_length + - root_cn_length + - sni_length + - cert_flags + - subject_cn + - issuer_subject_cn + - root_subject_cn + - server_name + - error + - container_id + - pod_namespace + - pod_name + - src_edl + - dest_edl + - src_dag + - dest_dag + - timestamp + - src_dvc_category + - src_dvc_profile + - src_dvc_model + - src_dvc_vendor + - src_dvc_os + - src_dvc_os_version + - src_name + - src_mac + - dest_dvc_category + - dest_dvc_profile + - dest_dvc_model + - dest_dvc_vendor + - dest_dvc_os + - dest_dvc_os_version + - dest_name + - dest_mac + - sequence_number + - action_flags + - devicegroup_level1 + - devicegroup_level2 + - devicegroup_level3 + - devicegroup_level4 + - vsys_name + - dvc_name + - vsys_id + - app_subcategory + - app_category + - app_technology + - app_risk + - app_characteristic + - app_container + - app_tunneled + - app_saas + - app_sanctioned + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field - id: lookup filter: "true" disabled: null @@ -57,32 +189,55 @@ functions: outFields: - lookupField: tz eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. - id: auto_timestamp filter: "true" disabled: null conf: - srcField: _raw + srcField: generated_time dstField: _time defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" offset: 0 maxLen: 150 defaultTime: now latestDateAllowed: +1week earliestDateAllowed: -420weeks timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ + - regex: /(.*)/ strptime: "%Y/%m/%d %H:%M:%S" description: Use the correct "generated time" as the timestamp for the event. - - id: serde + - id: comment filter: "true" disabled: null conf: - mode: reserialize + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time, timestamp - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + remove: + - future_use* + - "*_time" + - timestamp + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false + conf: type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -113,7 +268,7 @@ functions: - src_translated_port - dest_translated_port - flags - - IP_PROTOCOL + - ip_protocol - action - tunnel_id - future_use2 @@ -139,8 +294,8 @@ functions: - cert_end_time - cert_version - cert_size - - cn_lenght - - issure_cn_length + - cn_length + - issuer_cn_length - root_cn_length - sni_length - cert_flags @@ -180,7 +335,7 @@ functions: - devicegroup_level3 - devicegroup_level4 - vsys_name - - device_name + - dvc_name - vsys_id - app_subcategory - app_category @@ -188,13 +343,24 @@ functions: - app_risk - app_characteristic - app_container + - app_tunneled - app_saas - - app_sanctioned + - app_sanction + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype remove: - - future_use* - - "*_time" - - timestamp - - dest_dvc_* - - src_dvc_* - - devicegroup_* - - app_* + - "*" + description: Final field cleanup + groupId: yicqwn +description: DECRYPTION log type diff --git a/default/pipelines/pan_globalprotect/conf.yml b/default/pipelines/pan_globalprotect/conf.yml index e5a51d8..7073d9f 100644 --- a/default/pipelines/pan_globalprotect/conf.yml +++ b/default/pipelines/pan_globalprotect/conf.yml @@ -1,5 +1,8 @@ output: default -groups: {} +groups: + yicqwn: + name: Reserialization of Events + index: 6 asyncFuncTimeout: 1000 functions: - id: comment @@ -14,24 +17,29 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing - #5. Reshape the events using the parser to remove unnecessary fields + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value + + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7-10. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host + - name: _raw + value: (message || _raw).substring((message || _raw).indexOf(',')) - value: "'pan:globalprotect'" name: sourcetype - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' - - name: _raw - value: (message || _raw).substring((message || _raw).indexOf(',')) + value: index || C.vars.pan_default_index keep: - _raw - _time @@ -41,6 +49,72 @@ functions: - sourcetype remove: - "*" + - id: serde + filter: "true" + disabled: null + conf: + mode: extract + type: csv + srcField: _raw + fields: + - future_use1 + - receive_time + - serial_number + - type + - future_use2 + - version + - generated_time + - vsys + - event_id + - stage + - auth_method + - tunnel_type + - src_user + - src_region + - machine_name + - public_ip + - public_ipv6 + - private_ip + - private_ipv6 + - host_id + - serialnumber + - client_ver + - client_os + - client_os_ver + - repeat_count + - reason + - error + - opaque + - status + - location + - login_duration + - connect_method + - error_code + - portal + - sequence_number + - action_flags + - event_time + - selection_type + - response_time + - priority + - attempted_gateways + - gateway + - devicegroup_level1 + - devicegroup_level2 + - devicegroup_level3 + - devicegroup_level4 + - vsys_name + - dvc_name + - vsys_id + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field - id: lookup filter: "true" disabled: null @@ -57,32 +131,54 @@ functions: outFields: - lookupField: tz eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. - id: auto_timestamp filter: "true" disabled: null conf: - srcField: _raw + srcField: generated_time dstField: _time defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" offset: 0 maxLen: 150 defaultTime: now latestDateAllowed: +1week earliestDateAllowed: -420weeks timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ + - regex: /(.*)/ strptime: "%Y/%m/%d %H:%M:%S" description: Use the correct "generated time" as the timestamp for the event. - - id: serde + - id: comment filter: "true" disabled: null conf: - mode: reserialize + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + remove: + - future_use* + - "*_time" + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false + conf: type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -90,7 +186,7 @@ functions: - type - future_use2 - version - - time_generated + - generated_time - vsys - event_id - stage @@ -133,7 +229,21 @@ functions: - vsys_name - dvc_name - vsys_id + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype remove: - - future_use* - - "*_time" - - time_generated + - "*" + description: Final field cleanup + groupId: yicqwn +description: GLOBALPROTECT log type diff --git a/default/pipelines/pan_hipmatch/conf.yml b/default/pipelines/pan_hipmatch/conf.yml index f8a0bce..484180c 100644 --- a/default/pipelines/pan_hipmatch/conf.yml +++ b/default/pipelines/pan_hipmatch/conf.yml @@ -1,5 +1,8 @@ output: default -groups: {} +groups: + yicqwn: + name: Reserialization of Events + index: 6 asyncFuncTimeout: 1000 functions: - id: comment @@ -15,24 +18,29 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing - #5. Reshape the events using the parser to remove unnecessary fields + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value + + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7-10. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host + - name: _raw + value: (message || _raw).substring((message || _raw).indexOf(',')) - name: sourcetype value: "'pan:hipmatch'" - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' - - name: _raw - value: (message || _raw).substring((message || _raw).indexOf(',')) + value: index || C.vars.pan_default_index keep: - _raw - _time @@ -42,6 +50,54 @@ functions: - sourcetype remove: - "*" + - id: serde + filter: "true" + disabled: null + conf: + mode: extract + type: csv + srcField: _raw + fields: + - future_use1 + - receive_time + - serial_number + - type + - log_subtype + - version + - generated_time + - src_user + - vsys + - host_name + - os + - src_ip + - hip_name + - hip_count + - hip_type + - future_use3 + - future_use4 + - sequence_number + - action_flags + - devicegroup_level1 + - devicegroup_level2 + - devicegroup_level3 + - devicegroup_level4 + - vsys_name + - dvc_name + - vsys_id + - ipv6_system_address + - host_id + - serialnumber + - mac + - high_res_timestamp + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field - id: lookup filter: "true" disabled: null @@ -58,32 +114,55 @@ functions: outFields: - lookupField: tz eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. - id: auto_timestamp filter: "true" disabled: null conf: - srcField: _raw + srcField: generated_time dstField: _time defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" offset: 0 maxLen: 150 defaultTime: now latestDateAllowed: +1week earliestDateAllowed: -420weeks timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ + - regex: /(.*)/ strptime: "%Y/%m/%d %H:%M:%S" description: Use the correct "generated time" as the timestamp for the event. - - id: serde + - id: comment filter: "true" disabled: null conf: - mode: reserialize + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time, high_res_timestamp - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + remove: + - future_use* + - "*_time" + - high_res_timestamp + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false + conf: type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -116,7 +195,21 @@ functions: - serialnumber - mac - high_res_timestamp + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype remove: - - future_use* - - "*_time" - - high_res_timestamp + - "*" + description: Final field cleanup + groupId: yicqwn +description: HIPMATCH log type diff --git a/default/pipelines/pan_system/conf.yml b/default/pipelines/pan_system/conf.yml index dd6cbd3..b25732a 100644 --- a/default/pipelines/pan_system/conf.yml +++ b/default/pipelines/pan_system/conf.yml @@ -3,7 +3,10 @@ groups: RARnbB: name: Suppress Duplicate Events disabled: true - index: 5 + index: 6 + yicqwn: + name: Reserialization of Events + index: 7 asyncFuncTimeout: 1000 functions: - id: comment @@ -18,26 +21,31 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing - #5. Reshape the events using the parser to remove unnecessary fields + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value - #6-7. Suppress duplicate system event messages + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7. Suppress duplicate system event messages (optional) + + #8-11. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host - name: _raw value: (message || _raw).substring((message || _raw).indexOf(',')) - name: sourcetype value: "'pan:system'" - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' + value: index || C.vars.pan_default_index keep: - _raw - host @@ -47,48 +55,13 @@ functions: - _time remove: - "*" - description: Set fields to correct values and remove the remainder. Cleanup the _raw - field by removing the syslog header. - - id: lookup - filter: "true" - disabled: null - conf: - matchMode: regex - matchType: specific - reloadPeriodSec: 60 - addToEvent: false - inFields: - - eventField: host - lookupField: host - ignoreCase: false - file: device_info.csv - outFields: - - lookupField: tz - eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. - - id: auto_timestamp - filter: "true" - disabled: null - conf: - srcField: _raw - dstField: _time - defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" - offset: 0 - maxLen: 150 - defaultTime: now - latestDateAllowed: +1week - earliestDateAllowed: -420weeks - timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ - strptime: "%Y/%m/%d %H:%M:%S" - description: Use the correct "generated time" as the timestamp for the event. + description: Set fields to correct values and remove the remainder. Cleanup the + _raw field by removing the syslog header. - id: serde filter: "true" disabled: false conf: - mode: reserialize + mode: extract type: csv srcField: _raw fields: @@ -118,18 +91,93 @@ functions: - future_use5 - future_use6 - high_res_timestamp + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field + - id: lookup + filter: "true" + disabled: null + conf: + matchMode: regex + matchType: specific + reloadPeriodSec: 60 + addToEvent: false + inFields: + - eventField: host + lookupField: host + ignoreCase: false + file: device_info.csv + outFields: + - lookupField: tz + eventField: __tz + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. + - id: auto_timestamp + filter: "true" + disabled: null + conf: + srcField: generated_time + dstField: _time + defaultTimezone: utc + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" + offset: 0 + maxLen: 150 + defaultTime: now + latestDateAllowed: +1week + earliestDateAllowed: -420weeks + timestamps: + - regex: /(.*)/ + strptime: "%Y/%m/%d %H:%M:%S" + description: Use the correct "generated time" as the timestamp for the event. + - id: suppress + filter: "true" + disabled: true + conf: + allow: 1 + suppressPeriodSec: 300 + dropEventsMode: true + maxCacheSize: 50000 + cacheIdleTimeoutPeriods: 2 + numEventsIdleTimeoutTrigger: 10000 + keyExpr: "`${__temp.event_id} ${__temp.description}`" + groupId: RARnbB + - id: comment + filter: "true" + disabled: null + conf: + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * receive_time, generated_time, high_res_timestamp - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: remove: - future_use* - receive_time - generated_time - high_res_timestamp - - id: serde + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize filter: "true" - disabled: true + disabled: false conf: - mode: extract type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -154,17 +202,24 @@ functions: - devicegroup_level4 - vsys_name - dvc_name - dstField: __temp - groupId: RARnbB - - id: suppress + - future_use5 + - future_use6 + - high_res_timestamp + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval filter: "true" - disabled: true + disabled: false conf: - allow: 1 - suppressPeriodSec: 300 - dropEventsMode: true - maxCacheSize: 50000 - cacheIdleTimeoutPeriods: 2 - numEventsIdleTimeoutTrigger: 10000 - keyExpr: "`${__temp.event_id} ${__temp.description}`" - groupId: RARnbB + keep: + - _raw + - _time + - index + - host + - source + - sourcetype + remove: + - "*" + description: Final field cleanup + groupId: yicqwn +description: SYSTEM log type diff --git a/default/pipelines/pan_threat/conf.yml b/default/pipelines/pan_threat/conf.yml index dddee79..5631e46 100644 --- a/default/pipelines/pan_threat/conf.yml +++ b/default/pipelines/pan_threat/conf.yml @@ -1,5 +1,9 @@ output: default -groups: {} +groups: + ZHU77H: + name: Reserialization of Events + disabled: false + index: 6 asyncFuncTimeout: 1000 functions: - id: comment @@ -14,24 +18,29 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing + + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value - #5. Reshape the events using the parser to remove unnecessary fields + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7-11. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host - name: _raw value: (message || _raw).substring((message || _raw).indexOf(',')) - name: sourcetype value: "'pan:threat'" - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' + value: index || C.vars.pan_default_index remove: - "*" keep: @@ -41,46 +50,11 @@ functions: - source - sourcetype - _time - - id: lookup - filter: "true" - disabled: null - conf: - matchMode: regex - matchType: specific - reloadPeriodSec: 60 - addToEvent: false - inFields: - - eventField: host - lookupField: host - ignoreCase: false - file: device_info.csv - outFields: - - lookupField: tz - eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. - - id: auto_timestamp - filter: "true" - disabled: null - conf: - srcField: _raw - dstField: _time - defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" - offset: 0 - maxLen: 150 - defaultTime: now - latestDateAllowed: +1week - earliestDateAllowed: -420weeks - timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ - strptime: "%Y/%m/%d %H:%M:%S" - description: Use the correct "generated time" as the timestamp for the event. - id: serde filter: "true" disabled: false conf: - mode: reserialize + mode: extract type: csv srcField: _raw fields: @@ -113,7 +87,7 @@ functions: - src_translated_port - dest_translated_port - session_flags - - transport + - ip_protocol - action - misc - threat @@ -191,7 +165,7 @@ functions: - src_dag - dest_dag - partial_hash - - high_res_timestmp + - high_res_timestamp - reason - justification - nssai_sst @@ -201,11 +175,83 @@ functions: - app_risk - app_characteristic - app_container + - app_tunneled - app_saas - app_sanction - - app_tunneled - cloud_report_id keep: [] + remove: [] + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field + - id: lookup + filter: "true" + disabled: null + conf: + matchMode: regex + matchType: specific + reloadPeriodSec: 60 + addToEvent: false + inFields: + - eventField: host + lookupField: host + ignoreCase: false + file: device_info.csv + outFields: + - lookupField: tz + eventField: __tz + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. + - id: auto_timestamp + filter: "true" + disabled: null + conf: + srcField: _raw + dstField: _time + defaultTimezone: utc + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" + offset: 0 + maxLen: 150 + defaultTime: now + latestDateAllowed: +1week + earliestDateAllowed: -420weeks + timestamps: + - regex: /^(?:[^,]*,){6}([^,]+)/ + strptime: "%Y/%m/%d %H:%M:%S" + description: Use the correct "generated time" as the timestamp for the event. + - id: comment + filter: "true" + disabled: false + conf: + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: ZHU77H + - id: comment + filter: "true" + disabled: false + conf: + comment: >- + The following fields are removed because they are not included in the + field extractions for the Splunk TA: + + + rule_uuid, http2_connection, link_change_count, policy_id, link_switches, sdwan_cluster, sdwan_device_type, sdwan_cluster_type, sdwan_site, dynusergroup_name, xff_ip, src_dvc_category, src_dvc_profile, src_dvc_model, src_dvc_vendor, src_dvc_os_family, src_dvc_os_version, src_dvc_host, src_dvc_mac, dest_dvc_category, dest_dvc_profile, dest_dvc_model, dest_dvc_vendor, dest_dvc_os_family, dest_dvc_os_version, dest_dvc_host, dest_dvc_mac, container_id, pod_namespace, pod_name, src_edl, dest_edl, host_id, dvc_serial_number, src_dag, dest_dag, session_owner, high_res_timestamp, nsdsai_sst, nsdsai_sd, app_subcategory, app_category, app_technology, app_risk, app_characteristic, app_container, app_tunneled, app_saas, app_sanction, offloaded + groupId: ZHU77H + - id: eval + filter: "true" + disabled: false + conf: remove: - receive_time - generated_time @@ -230,9 +276,159 @@ functions: - dvc_serial_number - "*_dag" - partial_hash - - high_res_timestmp + - high_res_timestamp - reason - justification - nssai_sst - app_* - cloud_report_id + keep: + - _time + groupId: ZHU77H + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false + conf: + type: csv + dstField: _raw + fields: + - future_use1 + - receive_time + - serial_number + - type + - log_subtype + - version + - generated_time + - src_ip + - dest_ip + - src_translated_ip + - dest_translated_ip + - rule + - src_user + - dest_user + - app + - vsys + - src_zone + - dest_zone + - src_interface + - dest_interface + - log_forwarding_profile + - future_use3 + - session_id + - repeat_count + - src_port + - dest_port + - src_translated_port + - dest_translated_port + - session_flags + - ip_protocol + - action + - misc + - threat + - raw_category + - severity + - direction + - sequence_number + - action_flags + - src_location + - dest_location + - future_use4 + - content_type + - pcap_id + - file_hash + - cloud_address + - url_index + - user_agent + - file_type + - xff + - referrer + - sender + - subject + - recipient + - report_id + - devicegroup_level1 + - devicegroup_level2 + - devicegroup_level3 + - devicegroup_level4 + - vsys_name + - dvc_name + - future_use5 + - src_vm + - dest_vm + - http_method + - tunnel_id + - tunnel_monitor_tag + - tunnel_session_id + - tunnel_start_time + - tunnel_type + - threat_category + - content_version + - future_use6 + - sctp_assoc_id + - payload_protocol_id + - http_headers + - url_category_list + - rule_uuid + - http2_connection + - dynusergroup_name + - xff_ip + - src_dvc_category + - src_dvc_profile + - src_dvc_model + - src_dvc_vendor + - src_dvc_os_family + - src_dvc_os_version + - src_dvc_host + - src_dvc_mac + - dest_dvc_cateogry + - dest_dvc_profile + - dest_dvc_model + - dest_dvc_vendor + - dest_dvc_os_family + - dest_dvc_os_version + - dest_dvc_host + - dest_dvc_mac + - container_id + - pod_namespace + - pod_name + - src_edl + - dest_edl + - host_id + - dvc_serial_number + - domain_edl + - src_dag + - dest_dag + - partial_hash + - high_res_timestamp + - reason + - justification + - nssai_sst + - app_subcategory + - app_category + - app_technology + - app_risk + - app_characteristic + - app_container + - app_tunneled + - app_saas + - app_sanction + - cloud_report_id + groupId: ZHU77H + description: Push event back into CSV format in _raw + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype + remove: + - "*" + description: Final field cleanup + groupId: ZHU77H +description: THREAT log type diff --git a/default/pipelines/pan_traffic/conf.yml b/default/pipelines/pan_traffic/conf.yml index 1e1468b..6052beb 100644 --- a/default/pipelines/pan_traffic/conf.yml +++ b/default/pipelines/pan_traffic/conf.yml @@ -4,11 +4,14 @@ groups: name: Sampling of Events description: Enable these functions if you want to sample events disabled: true - index: 5 + index: 6 yjDKQN: name: Drop irrelevant logs disabled: true - index: 6 + index: 7 + yicqwn: + name: Reserialization of Events + index: 8 asyncFuncTimeout: 1000 functions: - id: comment @@ -23,28 +26,33 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing + + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value - #5. Reshape the events using the parser to remove unnecessary fields + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" - #6-8. Sample events + #7-8. Sample events (optional) - #9-11. Drop logs with subtype of start + #9-10. Drop logs with subtype of start (optional) + + #11-16. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - name: host - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host - name: _raw value: (message || _raw).substring((message || _raw).indexOf(',')) - name: sourcetype value: "'pan:traffic'" - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' + value: index || C.vars.pan_default_index keep: - _raw - _time @@ -54,48 +62,13 @@ functions: - sourcetype remove: - "*" - description: Set fields to correct values and remove the remainder. Cleanup the _raw - field by removing the syslog header. - - id: lookup - filter: "true" - disabled: null - conf: - matchMode: regex - matchType: specific - reloadPeriodSec: 60 - addToEvent: false - inFields: - - eventField: host - lookupField: host - ignoreCase: false - file: device_info.csv - outFields: - - lookupField: tz - eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. - - id: auto_timestamp - filter: "true" - disabled: false - conf: - srcField: _raw - dstField: _time - defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" - offset: 0 - maxLen: 150 - defaultTime: now - latestDateAllowed: +1week - earliestDateAllowed: -420weeks - timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ - strptime: "%Y/%m/%d %H:%M:%S" - description: Use the correct "generated time" as the timestamp for the event. + description: Set fields to correct values and remove the remainder. Cleanup the + _raw field by removing the syslog header. - id: serde filter: "true" disabled: false conf: - mode: reserialize + mode: extract type: csv srcField: _raw fields: @@ -128,7 +101,7 @@ functions: - src_translated_port - dest_translated_port - session_flags - - transport + - ip_protocol - action - bytes - bytes_out @@ -214,37 +187,60 @@ functions: - app_saas - app_sanction - offloaded - remove: - - future_use* - - "*_time" - - sctp_* - - rule_uuid - - http2_connection - - link_* - - policy_id - - sdwan_* - - dynusergroup_name - - xff_ip - - src_dvc_* - - dest_dvc_* - - container_id - - pod_* - - "*_edl" - - host_id - - dvc_serial_number - - "*_dag" - - session_owner - - high_res_timestamp - - nsdsai_* - - app_* - - offloaded + remove: [] keep: [] + description: Extract field values to the top level of the event. + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: dvc_name || host + description: If Global Variable is true, set the host field value to the value + from the dvc_name field + - id: lookup + filter: "true" + disabled: null + conf: + matchMode: regex + matchType: specific + reloadPeriodSec: 60 + addToEvent: false + inFields: + - eventField: host + lookupField: host + ignoreCase: false + file: device_info.csv + outFields: + - lookupField: tz + eventField: __tz + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. + - id: auto_timestamp + filter: "true" + disabled: false + conf: + srcField: generated_time + dstField: _time + defaultTimezone: utc + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" + offset: 0 + maxLen: 150 + defaultTime: now + latestDateAllowed: +1week + earliestDateAllowed: -420weeks + timestamps: + - regex: /(.*)/ + strptime: "%Y/%m/%d %H:%M:%S" + description: Use the correct "generated time" as the timestamp for the event. - id: comment filter: "true" disabled: true conf: comment: >- - Enable these functions if you want to sample events. + Enable this function if you want to sample events. Default sampling rules include: @@ -253,39 +249,91 @@ functions: * 10:1 of traffic from trusted zone to trusted zone with action "allowed" groupId: PwGgHd - - id: regex_extract - filter: "true" - disabled: true - conf: - source: _raw - iterations: 100 - overwrite: false - regex: /(?:[^,]*,){4}(?<__log_subtype>[^,]*),(?:[^,]*,){11}(?<__src_zone>[^,]*),(?<__dest_zone>[^,]*),(?:[^,]*,){11}(?<__transport>[^,]*),(?<__action>[^,]*),(?:[^,]*,){2}(?<__bytes_in>[^,]*)/ - groupId: PwGgHd - id: sampling filter: "true" disabled: true conf: rules: - - filter: __bytes_in==0 + - filter: bytes_in==0 rate: 5 - - filter: __src_zone=='trusted' && __dest_zone=='trusted' && __action=='allow' + - filter: src_zone=='trusted' && dest_zone=='trusted' && action=='allow' rate: 10 groupId: PwGgHd - id: comment filter: "true" disabled: true conf: - comment: Enable these functions you want to drop logs with the subtype equal to + comment: Enable this function you want to drop logs with the subtype equal to 'start'. groupId: yjDKQN - - id: serde - filter: "true" + - id: drop + filter: log_subtype=='start' disabled: true + conf: {} + final: true + groupId: yjDKQN + - id: comment + filter: "true" + disabled: null + conf: + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: comment + filter: "true" + disabled: null + conf: + comment: >- + The following fields are removed because they are not included in the + field extractions for the Splunk TA: + + + rule_uuid, http2_connection, link_change_count, policy_id, link_switches, sdwan_cluster, sdwan_device_type, sdwan_cluster_type, sdwan_site, dynusergroup_name, xff_ip, src_dvc_category, src_dvc_profile, src_dvc_model, src_dvc_vendor, src_dvc_os_family, src_dvc_os_version, src_dvc_host, src_dvc_mac, dest_dvc_category, dest_dvc_profile, dest_dvc_model, dest_dvc_vendor, dest_dvc_os_family, dest_dvc_os_version, dest_dvc_host, dest_dvc_mac, container_id, pod_namespace, pod_name, src_edl, dest_edl, host_id, dvc_serial_number, src_dag, dest_dag, session_owner, high_res_timestamp, nsdsai_sst, nsdsai_sd, app_subcategory, app_category, app_technology, app_risk, app_characteristic, app_container, app_tunneled, app_saas, app_sanction, offloaded + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + remove: + - future_use* + - generated_time + - receive_time + - sctp_* + - rule_uuid + - http2_connection + - link_* + - policy_id + - sdwan_* + - dynusergroup_name + - xff_ip + - src_dvc_* + - dest_dvc_* + - container_id + - pod_* + - "*_edl" + - host_id + - dvc_serial_number + - "*_dag" + - session_owner + - high_res_timestamp + - nsdsai_* + - app_* + - offloaded + - tunnel_start_time + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false conf: - mode: extract type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -316,7 +364,7 @@ functions: - src_translated_port - dest_translated_port - session_flags - - transport + - ip_protocol - action - bytes - bytes_out @@ -359,12 +407,11 @@ functions: - link_switches - sdwan_cluster - sdwan_device_type - - sdwan_cluster_type + - sdwant_cluster_type - sdwan_site - dynusergroup_name - xff_ip - src_dvc_category - - src_dvc_profile - src_dvc_model - src_dvc_vendor - src_dvc_os_family @@ -390,8 +437,8 @@ functions: - dest_dag - session_owner - high_res_timestamp - - nsdsai_sst - - nsdsai_sd + - nsdsadi_sst + - nsdsadi_sd - app_subcategory - app_category - app_technology @@ -402,13 +449,21 @@ functions: - app_saas - app_sanction - offloaded - remove: [] - keep: [] - dstField: __parsed - groupId: yjDKQN - - id: drop - filter: __parsed.log_subtype=='start' - disabled: true - conf: {} - final: true - groupId: yjDKQN + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype + remove: + - "*" + description: Final field cleanup + groupId: yicqwn +description: TRAFFIC log type diff --git a/default/pipelines/pan_userid/conf.yml b/default/pipelines/pan_userid/conf.yml index 58a159b..6d8611e 100644 --- a/default/pipelines/pan_userid/conf.yml +++ b/default/pipelines/pan_userid/conf.yml @@ -1,5 +1,8 @@ output: default -groups: {} +groups: + yicqwn: + name: Reserialization of Events + index: 6 asyncFuncTimeout: 1000 functions: - id: comment @@ -14,24 +17,29 @@ functions: #2. Simple eval to set the host, sourcetype, source, index, and cleanup the _raw message to remove the syslog header - #3-4. Use the Auto Timestamp function to set the event timestamp to the "generated time" + #3. The parser function extracts all field values to the top level for event processing - #5. Reshape the events using the parser to remove unnecessary fields + #4. If the pan_device_name_as_host Global Variable is set to true, use the dvc_name field as the host value + + #5-6. Use the Auto Timestamp function to set the event timestamp to the "generated time" + + #7-10. Reserialization of Events back into CSV dropping fields that are not relevant - id: eval filter: "true" disabled: null conf: add: - - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] || host + - value: _raw.match(/[A-Z][a-z]{2}\s{1,2}\d{1,2}\s\d{2}:\d{2}:\d{2}\s([^\s]+)\s/)[1] + || host name: host + - name: _raw + value: (message || _raw).substring((message || _raw).indexOf(',')) - name: sourcetype value: "'pan:userid'" - name: source - value: source || 'pan:syslog' + value: source || C.vars.pan_default_source - name: index - value: index || 'pan_logs' - - name: _raw - value: (message || _raw).substring((message || _raw).indexOf(',')) + value: index || C.vars.pan_default_index keep: - _raw - _time @@ -41,6 +49,56 @@ functions: - sourcetype remove: - "*" + - id: serde + filter: "true" + disabled: null + conf: + mode: extract + type: csv + srcField: _raw + fields: + - future_use1 + - receive_time + - serial_number + - type + - log_subtype + - version + - generated_time + - vsys + - src_ip + - user + - datasource_name + - event_id + - repeat_count + - timeout_threshold + - src_port + - dest_port + - datasource + - datasource_type + - sequence_number + - action_flags + - devicegroup_level1 + - devicegroup_level2 + - devicegroup_level3 + - devicegroup_level4 + - vsys_name + - dvc_name + - vsys_id + - factor_type + - factor_completion_time + - factor_number + - ugflags + - userbysource + - high_res_timestamp + - id: eval + filter: C.vars.pan_device_name_as_host + disabled: null + conf: + add: + - name: host + value: " dvc_name || host" + description: If Global Variable is true, set the host field value to the value + from the dvc_name field - id: lookup filter: "true" disabled: null @@ -57,32 +115,55 @@ functions: outFields: - lookupField: tz eventField: __tz - description: Add time zone offset as an internal field to the event. Uses the host - field to look up the value from device_info.csv file. + description: Add time zone offset as an internal field to the event. Uses the + host field to look up the value from device_info.csv file. - id: auto_timestamp filter: "true" disabled: null conf: - srcField: _raw + srcField: generated_time dstField: _time defaultTimezone: utc - timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : time.getTime() / 1000" + timeExpression: "__tz ? C.Time.adjustTZ(time.getTime(), 'UTC', __tz) / 1000 : + time.getTime() / 1000" offset: 0 maxLen: 150 defaultTime: now latestDateAllowed: +1week earliestDateAllowed: -420weeks timestamps: - - regex: /^(?:[^,]*,){6}([^,]+)/ + - regex: /(.*)/ strptime: "%Y/%m/%d %H:%M:%S" description: Use the correct "generated time" as the timestamp for the event. - - id: serde + - id: comment filter: "true" disabled: null conf: - mode: reserialize + comment: >- + The following fields are dropped from the original message: + + * future_use_* - there is no defined usage of these fields + + * *_time, high_res_timestamp - redundant fields. The "generated_time" field is true timestamp of the event and is used for the Auto Timestamp function. + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + remove: + - future_use* + - "*_time" + - high_res_timestamp + keep: + - _time + groupId: yicqwn + description: Remove fields from event where the values should not be serialized + - id: serialize + filter: "true" + disabled: false + conf: type: csv - srcField: _raw + dstField: _raw fields: - future_use1 - receive_time @@ -93,14 +174,15 @@ functions: - generated_time - vsys - src_ip - - source_name + - user + - datasource_name - event_id - repeat_count - timeout_threshold - src_port - dest_port - - source - - source_type + - datasource + - datasource_type - sequence_number - action_flags - devicegroup_level1 @@ -113,12 +195,24 @@ functions: - factor_type - factor_completion_time - factor_number - - future_use2 - - future_use3 - ugflags - userbysource - high_res_timestamp + description: Serialize PAN OS events to the fields used in the Splunk TA + groupId: yicqwn + - id: eval + filter: "true" + disabled: false + conf: + keep: + - _raw + - _time + - index + - host + - source + - sourcetype remove: - - future_use* - - "*_time" - - high_res_timestamp + - "*" + description: Final field cleanup + groupId: yicqwn +description: USERID log type diff --git a/default/pipelines/route.yml b/default/pipelines/route.yml index 0a85105..42e3f0b 100644 --- a/default/pipelines/route.yml +++ b/default/pipelines/route.yml @@ -79,7 +79,7 @@ routes: disabled: false pipeline: pan_correlation description: "" - clones: [ ] + clones: [] enableOutputExpression: false outputExpression: null filter: _raw.indexOf(",CORRELATION,") > -1 diff --git a/default/vars.yml b/default/vars.yml new file mode 100644 index 0000000..c320774 --- /dev/null +++ b/default/vars.yml @@ -0,0 +1,19 @@ +pan_default_index: + type: string + lib: custom + value: "'pan_logs'" + description: The default index field value set in this Pack's pipelines + tags: pan +pan_default_source: + type: string + lib: custom + description: The default source field value set in this Pack's pipelines + value: "'pan:syslog'" + tags: pan +pan_device_name_as_host: + type: boolean + lib: custom + value: "false" + description: If true, sets the host field to the value of the "device_name" field in the + event instead of the host value from the syslog header + tags: pan diff --git a/package.json b/package.json index 433af94..7992e26 100644 --- a/package.json +++ b/package.json @@ -1 +1 @@ -{"name":"cribl-palo-alto-networks","version":"1.0.0","author":"Brendan Dalpe - Cribl","description":"Process, reduce, and transform Palo Alto Networks Firewall logs.","displayName":"Palo Alto Networks","tags":{"dataType":["logs"],"useCase":["reduction","filtering"],"technology":["paloalto"]}} \ No newline at end of file +{"name":"cribl-palo-alto-networks","version":"1.1.0","author":"Brendan Dalpe - Cribl","description":"Process, reduce, and transform Palo Alto Networks Firewall logs.","displayName":"Palo Alto Networks","tags":{"dataType":["logs"],"useCase":["reduction","filtering"],"technology":["paloalto"]}} \ No newline at end of file diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..5def3d5 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,8 @@ +test: + python3 -m unittest test_suite + +docker: + docker-compose up -d + +install: + pip3 install -r requirements.txt diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..83f0a4e --- /dev/null +++ b/test/README.md @@ -0,0 +1,17 @@ +# Test Suite for Cribl PAN Pack + +## About + +Python tooling built to test syslog input against pipelines inside Cribl Stream. You can find tests for pipelines inside `test_suite.py`. + +Requires `python3` and `pip3` to be installed. + +## Setup +```bash +make install && make docker +``` + +## Run Tests +```bash +make test +``` diff --git a/test/cribl_stream.py b/test/cribl_stream.py new file mode 100644 index 0000000..b4878fb --- /dev/null +++ b/test/cribl_stream.py @@ -0,0 +1,174 @@ +import os +import ndjson +import requests +import logging +import uuid +import tarfile +import io +from json import JSONDecodeError +from typing import Union +from pathlib import Path + + +logger = logging.getLogger(__name__) + + +class CriblStream: + def __init__(self, host, username, password, port=9000): + self.host = host + self.username = username + self.password = password + self.port = port + + self.token = self.get_token() + + def _call(self, method: str, endpoint: str, pack: str = None, payload: dict = None, data: bytes = None, + headers: dict = None, params: dict = None, files: dict = None, authenticated: bool = True): + url = f"http://{self.host}:{self.port}/api/v1{'/p/' + pack if pack else ''}{endpoint}" + + hdr = headers or {} + + if authenticated: + hdr.update({"authorization": f"Bearer {self.token}"}) + + # shortcut for having requests.get, requests.post, request.delete, etc. + response = getattr(requests, method)(url, headers=hdr, params=params, files=files, data=data, json=payload) + + try: + return response.json() + except JSONDecodeError: + # Live capture gets returned as NDJSON + return response.json(cls=ndjson.Decoder) + except Exception: + return response.content + + def get_token(self): + payload = { + "username": self.username, + "password": self.password + } + + response = self._call("post", "/auth/login", payload=payload, authenticated=False) + + return response["token"] + + def enable_syslog_input(self): + config = self._call("get", "/system/inputs/in_syslog") + + config = list(filter(lambda item: item['id'] == 'in_syslog', config['items']))[0] + + config['disabled'] = False + + self._call("patch", "/system/inputs/in_syslog", payload=config) + + def capture_sample(self, duration=3, max_events=10): + payload = { + "filter": "__inputId.startsWith('syslog:in_syslog')", + "duration": duration, + "maxEvents": max_events, + "level": "0" + } + + response = self._call("post", "/system/capture", payload=payload) + + # Wrap response in array if bare object + if isinstance(response, dict): + return [response] + + return response + + def save_sample(self, name: str, sample: [dict]): + payload = { + "sampleName": name, + "context": { + "events": sample + } + } + + response = self._call("post", "/system/samples", payload=payload) + + return response['items'][0]['id'] + + def delete_sample(self, sample_id: str): + samples = self._call("get", f"/system/samples/{sample_id}") + self._call("delete", f"/system/samples/{sample_id}", payload=samples['items'][0]) + + def delete_all_samples(self): + samples = self._call("get", "/system/samples") + samples = list(filter(lambda item: 'isTemplate' not in item, samples["items"])) + + for s in samples: + self._call("delete", f"/system/samples/{s['id']}", payload=s) + + def run_pipeline(self, pipeline: str, sample: str, pack: str = None): + payload = { + "mode": "pipe", + "pipelineId": pipeline, + "level": 3, + "sampleId": sample, + "dropped": True, + "cpuProfile": False, + "timeout": 10000, + "memory": 2048 + } + + response = self._call("post", "/preview", pack=pack, payload=payload) + + return response['items'] + + @staticmethod + def create_pack_tarball(): + """ + Creates an in-memory gzipped tarball containing the pack contents. + + :return: Byte array of the tarball + """ + def pack_filter(tarinfo): + # Filter out junk directories to be excluded from the pack + if os.path.basename(tarinfo.name) in ['.git', '.github', 'tests', 'venv', '.DS_Store', '.idea', 'test']: + return None + + # Reset user information + tarinfo.uid = tarinfo.gid = 0 + tarinfo.uname = tarinfo.gname = "root" + + return tarinfo + + file = io.BytesIO() + with tarfile.open(fileobj=file, mode="w:gz") as tar: + parent = Path(__file__).parent.parent + tar.add(parent, filter=pack_filter, arcname='') + + file.seek(0) + return file.read() + + def install_pack(self, file: Union[str, bytes]): + response = None + + # If file is a string, it should be a path on disk to tarball + if isinstance(file, str): + qs = { + 'filename': os.path.basename(file), + 'size': os.stat(file).st_size + } + + with open(file, 'rb') as f: + response = self._call("put", "/packs", params=qs, data=f.read()) + + # If file is a byte array, then generate a random file name, upload, and install + elif isinstance(file, bytes): + qs = { + 'filename': f"{str(uuid.uuid4())}.crbl", + 'size': len(file) + } + + response = self._call("put", "/packs", params=qs, data=file) + + if response: + self._call("post", "/packs", payload=response) + + def delete_pack(self, name: str): + response = self._call("get", f"/packs/{name}") + + if response: + self._call("delete", f"/packs/{name}", payload=response) diff --git a/test/docker-compose.yml b/test/docker-compose.yml new file mode 100644 index 0000000..e28e664 --- /dev/null +++ b/test/docker-compose.yml @@ -0,0 +1,9 @@ +version: '3' + +services: + cribl: + image: cribl/cribl:${CRIBL_VERSION:-latest} + ports: + - "9000:9000" + - "9514:9514" + - "9514:9514/udp" \ No newline at end of file diff --git a/test/requirements.txt b/test/requirements.txt new file mode 100644 index 0000000..f44bb4e --- /dev/null +++ b/test/requirements.txt @@ -0,0 +1,2 @@ +requests==2.26.0 +ndjson==0.3.1 \ No newline at end of file diff --git a/test/syslog.py b/test/syslog.py new file mode 100644 index 0000000..1dc53de --- /dev/null +++ b/test/syslog.py @@ -0,0 +1,70 @@ +import socket + + +class Facility: + KERN, USER, MAIL, DAEMON, AUTH, SYSLOG, LPR, NEWS, UUCP, CRON, AUTHPRIV, FTP = range(12) + LOCAL0, LOCAL1, LOCAL2, LOCAL3, LOCAL4, LOCAL5, LOCAL6, LOCAL7 = range(16, 24) + + +class Level: + EMERG, ALERT, CRIT, ERR, WARNING, NOTICE, INFO, DEBUG = range(8) + + +class PROTOCOL: + UDP, TCP = range(2) + + +class Syslog: + """A syslog client that logs to a remote server. + + Example: + >>> log = Syslog(host="foobar.example") + >>> log.send("hello", Level.WARNING) + """ + + def __init__(self, host="localhost", port=514, facility=Facility.LOCAL0): + self.host = host + self.port = port + self.facility = facility + self._udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + self._tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + def close(self): + self._udp_socket.close() + self._tcp_socket.close() + + def _send_tcp(self, data): + self._tcp_socket.sendto(data.encode('ascii'), (self.host, self.port)) + self._tcp_socket.close() + + def _send_udp(self, data): + self._udp_socket.sendto(data.encode('ascii'), (self.host, self.port)) + self._tcp_socket.close() + + def send(self, message, level=Level.INFO, protocol=PROTOCOL.UDP): + data = "<%d>%s" % (level + self.facility * 8, message) + + if protocol == PROTOCOL.UDP: + self._send_udp(data) + else: + self._send_tcp(data) + + def send_raw(self, message, protocol=PROTOCOL.UDP): + data = "%s" % message + + if protocol == PROTOCOL.UDP: + self._send_udp(data) + else: + self._send_tcp(data) + + def info(self, message): + self.send(message, Level.INFO) + + def warn(self, message): + self.send(message, Level.WARNING) + + def notice(self, message): + self.send(message, Level.NOTICE) + + def error(self, message): + self.send(message, Level.ERR) diff --git a/test/test_suite.py b/test/test_suite.py new file mode 100644 index 0000000..4a7bd61 --- /dev/null +++ b/test/test_suite.py @@ -0,0 +1,123 @@ +import unittest +from typing import Union + +from cribl_stream import CriblStream +from syslog import Syslog +import asyncio +import logging + +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') +logger = logging.getLogger(__name__) + +PACK = 'cribl-palo-alto-networks' +ls = CriblStream('localhost', 'admin', 'admin') +sys = Syslog('localhost', 9514) +loop = asyncio.new_event_loop() + + +def setUpModule(): + ls.enable_syslog_input() + tarball = ls.create_pack_tarball() + ls.install_pack(tarball) + asyncio.set_event_loop(loop) + + +def tearDownModule(): + loop.close() + sys.close() + ls.delete_pack(PACK) + + +class SyslogTest(unittest.TestCase): + async def capture_sample(self): + return await loop.run_in_executor(None, ls.capture_sample, 1) + + async def run_test(self, pipeline, message, pack=None): + t = asyncio.create_task(self.capture_sample()) + await asyncio.sleep(.25) + sys.send(message) + sample_data = await t + sample_id = ls.save_sample("test.log", sample_data) + result = ls.run_pipeline(pipeline, sample_id, pack) + + return result + + def tearDown(self) -> None: + ls.delete_all_samples() + + def go(self, pipeline, message, pack: Union[str, None] = PACK): + result = loop.run_until_complete(self.run_test(pipeline, message, pack)) + return result + + +class PanTraffic(SyslogTest): + PIPELINE = 'pan_traffic' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", "Jan 28 01:28:35 10.23.45.67 1,2014/01/28 01:28:35,007200001056,TRAFFIC,end,1,2014/01/28 01:28:34,192.168.41.30,192.168.41.255,10.193.16.193,192.168.41.255,allow-all,,,netbios-ns,vsys1,Trust,Untrust,ethernet1/1,ethernet1/2,To-Panorama,2014/01/28 01:28:34,8720,1,137,137,11637,137,0x400000,udp,allow,276,276,0,3,2014/01/28 01:28:02,2,any,0,2076326,0x0,192.168.0.0-192.168.255.255,192.168.0.0-192.168.255.255,0,3,0", None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:traffic'}) + self.assertNotIn("Jan 28 01:28:35 10.23.45.67", result[0]['_raw']) + + +class PanThreat(SyslogTest): + PIPELINE = 'pan_threat' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", "1 2022-10-11T17:19:35.782Z stream-logfwd20-718e7c5f--10111016-3fbk-harness-xb0d logforwarder - panwlogs - 2022-10-11T17:14:30.000000Z,no-serial,THREAT,vulnerability,10.0,2022-10-11T17:14:09.000000Z,10.50.123.2,10.240.120.16,,,Mobile Users to PNA-SNA Trust,xy\\alf,,ldap,vsys1,trust,inter-fw,tunnel.1,tunnel.4006,Cortex Logging,844334,1,53498,389,0,0,tcp,alert,,Microsoft Windows NTLMSSP Detection(92322),Informational,client to server,50288852,10.0.0.0-10.255.255.255,10.0.0.0-10.255.255.255,0,,,0,,,,,0,131,0,0,0,,GP cloud service,,,0,,0,1970-01-01T00:00:00.000000Z,N/A,info-leak,565386699,0x0,fd02920c-3450-4e49-b3dd-01b02f9f9cdb,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,2022-10-11T17:14:10.583000Z,", None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:threat'}) + + +class PanSystem(SyslogTest): + PIPELINE = 'pan_system' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", "1,2021/07/20 23:59:02,1234567890,SYSTEM,routing,0,2021/07/20 23:59:02,,routed-config-p1-success,,0,0,general,informational,Route daemon configuration load phase-1 succeeded.,0,0x0", None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:system'}) + + +class PanConfig(SyslogTest): + PIPELINE = 'pan_config' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", "1,2021/07/20 23:59:02,1234567890,CONFIG,0,0,2021/07/20 00:53:40,192.168.0.1,,commit,admin,Web,Submitted,,0,0x0", None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:config'}) + + +class PanHipMatch(SyslogTest): + PIPELINE = 'pan_hipmatch' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", "1,2021/07/20 23:59:02,1234567890,HIPMATCH,0,2049,2021/07/20 23:59:02,xx.xx,vsys1,xx-xxxxx-MB,Mac,10.252.31.187,GP-HIP,1,profile,0,0,1052623,0x0,17,11,12,0,,xxxxx,1,0.0.0.0,", None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:hipmatch'}) + + +class PanUserId(SyslogTest): + PIPELINE = 'pan_userid' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", '1,2020-10-13T01:23:50.000000Z,007051000113358,USERID,login,10.0,2020-10-13T01:23:34.000000Z,vsys1,::c28:7141:ffff:0,"xxxxx\\xxxxx o"xxxxxxxxxx"\'"xxxxxxxxxx"test",fake-data-source-95,1694498816,16777216,-1694302208,63502,60246,server_session_monitor,exchange_server,551324,-9223372036854775808,0,0,0,0,,PA-VM,1,xxxxx,2050-04-13T10:41:35.000000Z,1,64,xxxxxxxxxxxxxx,,2020-10-13T01:23:35.350000Z', None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:userid'}) + + +class PanGlobalProtect(SyslogTest): + PIPELINE = 'pan_globalprotect' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", '1,2020-10-13T01:22:32.000000Z,007051000113358,GLOBALPROTECT,globalprotect,10.0,2020-10-13T01:22:06.000000Z,vsys1,gateway-switch-to-ssl,before-login,SAML,ipsec,xxxxx\\xxxxx xxxxx,FI,machine_name3,xxx.xx.x.xx,::c307:39c8:ffff:0,xxx.xx.x.xx,::f32b:d251:ffff:0,67:11:5a:e2:d2:32,serialno_list-1,66567,Intel Mac OS,9.3.5,16777216,Admin,,opaque_list-0,success,San Francisco,1,connect_method_list-2,0,portal_list-2,557533,-9223372036854775808,2020-10-13T01:22:07.388000Z,select_type-0,50055,medium,"gateway-5,925,1;gateway-4,196,2;gateway-5,583,1;gateway-4,996,5;gateway-1,442,2;gateway-6,121,4;gateway-0,16,1;gateway-6,173,0;gateway-2,753,0;gateway-6,651,0;gateway-3,602,3;gateway-1,55,0;gateway-1,384,2;gateway-4,871,3;gateway-3,546,5;",', None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:globalprotect'}) + + +class PanDecryption(SyslogTest): + PIPELINE = 'pan_decryption' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", '1,2020-10-13T01:11:23.000000Z,007051000113358,,DECRYPTION,10.0,2020-10-13T01:11:05.000000Z,xxx.xx.x.xx,xxx.xx.x.xx,xxx.xx.x.xx,xxx.xx.x.xx,deny-attackers,00000000000000000000ffff05050505,paloaltonetwork\\xxxxx,mcafee-endpoint-encryption,vsys1,ethernet4Zone-test3,datacenter,,,rs-logging,2020-10-13T01:11:05.000000Z,999250,1,28790,18368,31621,27853,3072,tcp,allow,GRE,,,,,85c1488d-5bbd-42e7-8f28-a19256972c32,unknown,unknown,TLS1.3,ECDHE,AES_128_GCM,SHA256,,sect409k1,None,Untrusted,Uninspected,Broker,14ff0117d825393ebcad2bbfb94bc282da926a7a,6263d82e0ec3d57c209151526dc1240cc19ec2e685fbae4c81f394e9819a7699,1602551466,1605143466,V2,192,23,32,32,21,64,CN = MGMT-GROUP-MGMT-CA,CN = Thawte Premium Server CA1,CN = Thawte Premium Server CA1,devop-host.panw.local,,1873cc5c-0d31,pns_default,pan-dp-77754f4,,,,,2020-10-13T01:11:06.359000Z,H-Phone,h-profile,Pro,Huawei,Mate 10,Android v6.1,pan-411,264754728121,H-Phone,h-profile,ANE-LX3,Huawei,P20 Lite,Android v7.1,pan-431,496310767571,111291,-9223372036854775808', None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:decryption'}) + + +class PanCorrelation(SyslogTest): + PIPELINE = 'pan_correlation' + + def test_assert_sourcetype(self): + result = self.go(f"pack:{PACK}", '1,2021/07/20 23:59:02,012345678902,CORRELATION,,,2021/07/20 23:59:02,1.2.3.4,username,,compromised-host,medium,0,0,0,0,,us2,,beacon-heuristics,6005,"Host visited known malware URL (100 times)."', None) + self.assertEqual(result[0], result[0] | {'index': "pan_logs", 'sourcetype': 'pan:correlation'})