fix failure in logging 5.1

openshift · openshift-merge-robot · Apr 26, 2021 · Apr 25, 2021 · Apr 26, 2021 · 7a2ad7a3d7480683341be13eff767573f4b5c816
commit 7a2ad7a3d7480683341be13eff767573f4b5c816
diff --git a/features/logging/OWNERS b/features/logging/OWNERS
@@ -3,3 +3,4 @@ approvers:
 reviewers:
 - anpingli
 - QiaolingTang
+- gkarager
diff --git a/features/logging/elasticsearch.feature b/features/logging/elasticsearch.feature
@@ -37,9 +37,9 @@ Feature: Elasticsearch related tests
     """
     Given I obtain test data file "logging/clusterlogging/clusterlogging-storage-template.yaml"
     When I process and create:
-      | f | clusterlogging-storage-template.yaml |
-      | p | STORAGE_CLASS=<%= cb.default_sc.name %>                                                      |
-      | p | PVC_SIZE=10Gi                                                                                |
+      | f | clusterlogging-storage-template.yaml    |
+      | p | STORAGE_CLASS=<%= cb.default_sc.name %> |
+      | p | PVC_SIZE=10Gi                           |
     Then the step should succeed
     Given I wait for the "instance" clusterloggings to appear
     And the expression should be true> cluster_logging('instance').logstore_storage_class_name == cb.default_sc.name

diff --git a/features/logging/logging_acceptance.feature b/features/logging/logging_acceptance.feature
@@ -4,49 +4,50 @@ Feature: Logging smoke test case
   # @case_id OCP-37508
   @admin
   Scenario: One logging acceptance case for all cluster
-# Deploy cluster-logging operator via web console
-    Given logging channel name is stored in the :logging_channel clipboard	
-    Given logging service is removed successfully	
-    Given "elasticsearch-operator" packagemanifest's catalog source name is stored in the :eo_opsrc clipboard		
-    Given "cluster-logging" packagemanifest's catalog source name is stored in the :clo_opsrc clipboard	
-    Given I switch to the first user	
-    Given the first user is cluster-admin	
-    Given I open admin console in a browser	
-    # subscribe cluster-logging-operator	
-    When I perform the :goto_operator_subscription_page web action with:	
-      | package_name     | cluster-logging     |	
-      | catalog_name     | <%= cb.clo_opsrc %> |	
-      | target_namespace | openshift-logging   |	
-    Then the step should succeed	
-    And I perform the :set_custom_channel_and_subscribe web action with:	
-      | update_channel    | <%= cb.logging_channel %> |	
-      | install_mode      | OwnNamespace              |	
-      | approval_strategy | Automatic                 |	
-    Given cluster logging operator is ready	
+    # Deploy cluster-logging operator via web console
+    Given logging channel name is stored in the :logging_channel clipboard
+    Given logging service is removed successfully
+    Given "elasticsearch-operator" packagemanifest's catalog source name is stored in the :eo_opsrc clipboard
+    Given "cluster-logging" packagemanifest's catalog source name is stored in the :clo_opsrc clipboard
+    Given I switch to the first user
+    Given the first user is cluster-admin
+    And evaluation of `user.cached_tokens.first` is stored in the :user_token_1 clipboard
+    Given I open admin console in a browser
+    # subscribe cluster-logging-operator
+    When I perform the :goto_operator_subscription_page web action with:
+      | package_name     | cluster-logging     |
+      | catalog_name     | <%= cb.clo_opsrc %> |
+      | target_namespace | openshift-logging   |
+    Then the step should succeed
+    And I perform the :set_custom_channel_and_subscribe web action with:
+      | update_channel    | <%= cb.logging_channel %> |
+      | install_mode      | OwnNamespace              |
+      | approval_strategy | Automatic                 |
+    Given cluster logging operator is ready
     # subscribe elasticsearch-operator
-    When I perform the :goto_operator_subscription_page web action with:	
-      | package_name     | elasticsearch-operator        |	
-      | catalog_name     | <%= cb.eo_opsrc %>            |	
-      | target_namespace | openshift-operators-redhat    |	
-    Then the step should succeed	
-    When I perform the :set_custom_channel_and_subscribe web action with:	
-      | update_channel    | <%= cb.logging_channel %> |	
-      | install_mode      | AllNamespace              |	
-      | approval_strategy | Automatic                 |	
+    When I perform the :goto_operator_subscription_page web action with:
+      | package_name     | elasticsearch-operator        |
+      | catalog_name     | <%= cb.eo_opsrc %>            |
+      | target_namespace | openshift-operators-redhat    |
+    Then the step should succeed
+    When I perform the :set_custom_channel_and_subscribe web action with:
+      | update_channel    | <%= cb.logging_channel %> |
+      | install_mode      | AllNamespace              |
+      | approval_strategy | Automatic                 |
     Then the step should succeed
     Given elasticsearch operator is ready in the "openshift-operators-redhat" namespace
     Then I use the "openshift-logging" project
     And default storageclass is stored in the :default_sc clipboard
-    Given I obtain test data file "logging/clusterlogging/clusterlogging-storage-template.yaml"
+    Given I obtain test data file "logging/clusterlogging/cl-storage-with-im-template.yaml"
     When I process and create:
-      | f | clusterlogging-storage-template.yaml    |
+      | f | cl-storage-with-im-template.yaml        |
       | p | STORAGE_CLASS=<%= cb.default_sc.name %> |
       | p | PVC_SIZE=10Gi                           |
       | p | ES_NODE_COUNT=1                         |
       | p | REDUNDANCY_POLICY=ZeroRedundancy        |
     Then the step should succeed
-    Given I wait for the "instance" clusterloggings to appear   
-# Console Dashboard
+    Given I wait for the "instance" clusterloggings to appear
+    # Console Dashboard
     When I run the :goto_monitoring_db_cluster_logging web action
     Then the step should succeed
     Given evaluation of `["Elastic Cluster Status", "Elastic Nodes", "Elastic Shards", "Elastic Documents", "Total Index Size on Disk", "Elastic Pending Tasks", "Elastic JVM GC time", "Elastic JVM GC Rate", "Elastic Query/Fetch Latency | Sum", "Elastic Query Rate | Top 5", "CPU", "Elastic JVM Heap Used", "Elasticsearch Disk Usage", "File Descriptors In Use", "FluentD emit count", "FluentD Buffer Availability", "Elastic rx bytes", "Elastic Index Failure Rate", "FluentD Output Error Rate"]` is stored in the :cards clipboard
@@ -57,7 +58,7 @@ Feature: Logging smoke test case
     Then the step should succeed
     """
     And I close the current browser
-# ES Metrics
+    # ES Metrics
     Given I wait for the "monitor-elasticsearch-cluster" service_monitor to appear
     And the expression should be true> service_monitor('monitor-elasticsearch-cluster').service_monitor_endpoint_spec(server_name: "elasticsearch-metrics.openshift-logging.svc").port == "elasticsearch"
     And the expression should be true> service_monitor('monitor-elasticsearch-cluster').service_monitor_endpoint_spec(server_name: "elasticsearch-metrics.openshift-logging.svc").path == "/_prometheus/metrics"
@@ -69,9 +70,9 @@ Feature: Logging smoke test case
     Then the step should succeed
     And the expression should be true>  @result[:parsed]['data']['result'][0]['value']
     """
-# Fluentd Metrics
+    # Fluentd Metrics
     Given I use the "openshift-logging" project
-    Given I wait for the "fluentd" service_monitor to appear
+    And I wait for the "fluentd" service_monitor to appear
     Given the expression should be true> service_monitor('fluentd').service_monitor_endpoint_spec(server_name: "fluentd.openshift-logging.svc").port == "metrics"
     And the expression should be true> service_monitor('fluentd').service_monitor_endpoint_spec(server_name: "fluentd.openshift-logging.svc").path == "/metrics"
     Given I wait up to 360 seconds for the steps to pass:
@@ -82,72 +83,111 @@ Feature: Logging smoke test case
     Then the step should succeed
     And the expression should be true>  @result[:parsed]['data']['result'][0]['value']
     """
-# Kibana Access
-    Given I switch to the first user
+    # Kibana Access
+    Given I switch to the second user
     Given I create a project with non-leading digit name
-    Given evaluation of `project` is stored in the :proj clipboard
-    Given I obtain test data file "logging/loggen/container_json_log_template.json"
+    And evaluation of `project` is stored in the :proj clipboard
+    And I obtain test data file "logging/loggen/container_json_log_template.json"
     When I run the :new_app client command with:
       | file | container_json_log_template.json |
     Then the step should succeed
     Given a pod becomes ready with labels:
       | run=centos-logtest,test=centos-logtest |
-    Given I switch to the second user
-    And the second user is cluster-admin
+
+    Given I switch to cluster admin pseudo user
     And I use the "openshift-logging" project
     Given I wait for the "app" index to appear in the ES pod with labels "es-node-master=true"
     Given I wait for the "infra" index to appear in the ES pod with labels "es-node-master=true"
-    And I wait for the project "<%= cb.proj.name %>" logs to appear in the ES pod 
+    And I wait for the project "<%= cb.proj.name %>" logs to appear in the ES pod
+
+    Given I switch to the second user
+    And evaluation of `user.cached_tokens.first` is stored in the :user_token_2 clipboard
     When I login to kibana logging web console
     Then the step should succeed
     And I close the current browser
-# Data Check
-# Authorization
-    Given I switch to the second user
-    And the second user is cluster-admin
-    Given evaluation of `user.cached_tokens.first` is stored in the :user_token clipboard
+    # Data Check
+    # Authorization
+    Given I switch to cluster admin pseudo user
     And I use the "openshift-logging" project
-    And I wait for the project "<%= cb.proj.name %>" logs to appear in the ES pod
+    # cluster-admin user
+    When I perform the HTTP request on the ES pod with labels "es-node-master=true":
+      | relative_url | infra*/_count?format=JSON |
+      | op           | GET                       |
+      | token        | <%= cb.user_token_1 %>    |
+    Then the step should succeed
+    And the expression should be true> @result[:parsed]['count'] > 0
+    When I perform the HTTP request on the ES pod with labels "es-node-master=true":
+      | relative_url | app*/_count?format=JSON |
+      | op           | GET                     |
+      | token        | <%= cb.user_token_1 %>  |
+    Then the step should succeed
+    And the expression should be true> @result[:parsed]['count'] > 0
+    # normal user
     When I perform the HTTP request on the ES pod with labels "es-node-master=true":
       | relative_url | app*/_count?format=JSON' -d '{"query": {"match": {"kubernetes.namespace_name": "<%= cb.proj.name %>"}}} |
       | op           | GET                                                                                                     |
-      | token        | <%= cb.user_token %>                                                                                    |
+      | token        | <%= cb.user_token_2 %>                                                                                  |
     Then the step should succeed
     And the expression should be true> @result[:parsed]['count'] > 0
     When I perform the HTTP request on the ES pod with labels "es-node-master=true":
       | relative_url | infra*/_count?format=JSON |
       | op           | GET                       |
-      | token        | <%= cb.user_token %>      |
+      | token        | <%= cb.user_token_2 %>    |
     Then the step should succeed
-    And the expression should be true> @result[:parsed]['count'] > 0
+    And the expression should be true> [401, 403].include? @result[:exitstatus]
+
+    # Cronjob
+    # check if the cronjobs could delete/rollover indices
     When I perform the HTTP request on the ES pod with labels "es-node-master=true":
-      | relative_url | app*/_count?format=JSON |
-      | op           | GET                     |
-      | token        | <%= cb.user_token %>    |
+      | relative_url | _cat/indices?format=JSON |
+      | op           | GET                      |
     Then the step should succeed
-    And the expression should be true> @result[:parsed]['count'] > 0
-# Cronjob
-    Given the expression should be true> cluster_logging('instance').management_state == "Managed"
-    And the expression should be true> elasticsearch('elasticsearch').management_state == "Managed"
-    Given evaluation of `cron_job('curator').schedule` is stored in the :curator_schedule_1 clipboard
-    Then the expression should be true> cb.curator_schedule_1 == cluster_logging('instance').curation_schedule
-    When I run the :patch client command with:
-      | resource      | clusterlogging                                                    |
-      | resource_name | instance                                                          |
-      | p             | {"spec": {"curation": {"curator": {"schedule": "*/15 * * * *"}}}} |
-      | type          | merge                                                             |
-    Then the step should succeed
-    And the expression should be true> cluster_logging('instance').curation_schedule == "*/15 * * * *"
-    And I wait up to 180 seconds for the steps to pass:
+    And evaluation of `@result[:parsed].select {|e| e['index'].start_with? "app"}.map {|x| x["index"]}` is stored in the :app_indices clipboard
+    And evaluation of `@result[:parsed].select {|e| e['index'].start_with? "infra"}.map {|x| x["index"]}` is stored in the :infra_indices clipboard
+    And evaluation of `@result[:parsed].select {|e| e['index'].start_with? "audit"}.map {|x| x["index"]}` is stored in the :audit_indices clipboard
+
+    Given I register clean-up steps:
+    """
+    Given I successfully merge patch resource "clusterlogging/instance" with:
+      | {"spec": {"logStore": {"retentionPolicy": {"application": {"maxAge": "6h"}, "audit": {"maxAge": "1w"}, "infra": {"maxAge": "3h"}}}}} |
+    And I successfully merge patch resource "elasticsearch/elasticsearch" with:
+      | {"spec": {"managementState": "Managed"}} |
     """
-    Given the expression should be true> cron_job('curator').schedule(cached: false, quiet: true) == "*/15 * * * *"
+    Given I successfully merge patch resource "clusterlogging/instance" with:
+      | {"spec": {"logStore": {"retentionPolicy": {"application": {"maxAge": "6m"}, "audit": {"maxAge": "6m"}, "infra": {"maxAge": "6m"}}}}} |
+    And I wait up to 60 seconds for the steps to pass:
     """
-    When I run the :patch client command with:
-      | resource      | cronjob                                 |
-      | resource_name | curator                                 |
-      | p             | {"spec": {"schedule": "*/20 * * * *" }} |
-    Then the step should succeed
-    And I wait up to 180 seconds for the steps to pass:
-    """"
-    Given the expression should be true> cron_job('curator').schedule(cached: false, quiet: true) == "*/15 * * * *"
+    Given the expression should be true> elasticsearch("elasticsearch").delete_min_age(cached: false, name: "app-policy") == "6m"
+    And the expression should be true> elasticsearch("elasticsearch").delete_min_age(name: "infra-policy") == "6m"
+    And the expression should be true> elasticsearch("elasticsearch").delete_min_age(name: "audit-policy") == "6m"
+    """
+    Given I successfully merge patch resource "elasticsearch/elasticsearch" with:
+      | {"spec": {"managementState": "Unmanaged"}} |
+    And the expression should be true> elasticsearch("elasticsearch").management_state == "Unmanaged"
+
+    Given evaluation of `["elasticsearch-im-app", "elasticsearch-im-audit", "elasticsearch-im-infra"]` is stored in the :cj_names clipboard
+    And I repeat the following steps for each :cj_name in cb.cj_names:
+    """
+    Given I successfully merge patch resource "cronjob/#{cb.cj_name}" with:
+      | {"spec": {"schedule": "*/3 * * * *"}} |
+    And the expression should be true> cron_job('#{cb.cj_name}').schedule(cached: false) == "*/3 * * * *"
+    """
+
+    # check if there has new index created and check if the old index could be deleted or not
+    # !(cb.new_app_indices - cb.app_indices).empty? ensures there has new index
+    # !(cb.app_indices - cb.new_app_indices).empty? ensures some old indices can be deleted
+    Given I check the cronjob status
+    Then the step should succeed
+    Given I wait up to 660 seconds for the steps to pass:
+    """
+    When I perform the HTTP request on the ES pod with labels "es-node-master=true":
+      | relative_url | _cat/indices?format=JSON |
+      | op           | GET                      |
+    Then the step should succeed
+    Given evaluation of `@result[:parsed].select {|e| e['index'].start_with? "app"}.map {|x| x["index"]}` is stored in the :new_app_indices clipboard
+    And evaluation of `@result[:parsed].select {|e| e['index'].start_with? "infra"}.map {|x| x["index"]}` is stored in the :new_infra_indices clipboard
+    And evaluation of `@result[:parsed].select {|e| e['index'].start_with? "audit"}.map {|x| x["index"]}` is stored in the :new_audit_indices clipboard
+    Then the expression should be true> !(cb.new_app_indices - cb.app_indices).empty? && !(cb.app_indices - cb.new_app_indices).empty?
+    And the expression should be true> !(cb.new_infra_indices - cb.infra_indices).empty? && !(cb.app_indices - cb.new_app_indices).empty?
+    And the expression should be true> !(cb.new_audit_indices - cb.audit_indices).empty? && !(cb.app_indices - cb.new_app_indices).empty?
     """
diff --git a/features/step_definitions/logging.rb b/features/step_definitions/logging.rb
@@ -49,6 +49,7 @@
       raise "Error creating operatorgroup" unless @result[:success]
     end
 
+    #TODO: start from logging 5.1, no need to create role and rolebing, these resources will be created by OLM operator
     unless role_binding('prometheus-k8s').exists?
       # create RBAC object in `openshift-operators-redhat` namespace
       operator_group_yaml ||= "#{BushSlicer::HOME}/testdata/logging/eleasticsearch/deploy_via_olm/03_eo-rbac.yaml"

diff --git a/testdata/logging/clusterlogging/cl-storage-with-im-template.yaml b/testdata/logging/clusterlogging/cl-storage-with-im-template.yaml
@@ -0,0 +1,56 @@
+kind: Template
+apiVersion: v1
+metadata:
+  name: clusterlogging-template
+  annotations:
+    description: "Deploy clusterlogging with PVC"
+    tags: "cluster-logging"
+objects:
+  - kind: "ClusterLogging"
+    apiVersion: "logging.openshift.io/v1"
+    metadata:
+        name: "instance"
+        namespace: openshift-logging
+    spec:
+        managementState: "Managed"
+        logStore:
+          type: "elasticsearch"
+          retentionPolicy: 
+            application:
+              maxAge: 60h 
+            infra:
+              maxAge: 3h
+            audit:
+              maxAge: 1d
+          elasticsearch:
+            nodeCount: ${{ES_NODE_COUNT}}
+            resources:
+              requests:
+                cpu: 100m
+                memory: 1Gi
+            storage: 
+              storageClassName: "${STORAGE_CLASS}"
+              size: "${PVC_SIZE}"
+            redundancyPolicy: "${REDUNDANCY_POLICY}"
+        visualization:
+          type: "kibana"
+          kibana:
+            replicas: 1
+        curation:
+          type: "curator"
+          curator:
+            schedule: "*/5 * * * *"
+        collection:
+          logs:
+            type: "fluentd"
+            fluentd: {}
+
+parameters:
+  - name: STORAGE_CLASS
+    value: "gp2"
+  - name: PVC_SIZE
+    value: "10Gi"
+  - name: ES_NODE_COUNT
+    value: "1"
+  - name: REDUNDANCY_POLICY
+    value: "ZeroRedundancy"