Skip to content

Commit

Permalink
add new perfInsights.py script to analyse the data generated in resul…
Browse files Browse the repository at this point in the history
…t_data branch (#905)

Signed-off-by: Kamesh Akella <kamesh.asp@gmail.com>
Signed-off-by: Alexander Schwartz <aschwart@redhat.com>
Co-authored-by: Alexander Schwartz <aschwart@redhat.com>
  • Loading branch information
kami619 and ahus1 authored Aug 9, 2024
1 parent ffde4b4 commit c7aae9f
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 1 deletion.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ gatling-charts-*
# Grafana report pdfs
benchmark/src/main/python/grafana_report_pdfs

# Python files
benchmark/src/main/python/venv
benchmark/src/main/python/results
benchmark/src/main/python/perf_insights.log

# Intellij
###################
.idea
Expand Down
3 changes: 3 additions & 0 deletions benchmark/src/main/python/README.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
To find out more about the Grafana dashboard-to-PDF script `snapGrafana.py`, visit https://www.keycloak.org/keycloak-benchmark/kubernetes-guide/latest/util/grafana#snapgrafana-py-cli-options

To find out more about the performance analysis script `perfInsights.py`, visit https://www.keycloak.org/keycloak-benchmark/kubernetes-guide/latest/util/perf-insights
140 changes: 140 additions & 0 deletions benchmark/src/main/python/perfInsights.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import pandas as pd
import requests
import argparse
from pandas import json_normalize
import logging
import json

def setup_logger(log_file):
# Set up logging to a file
logging.basicConfig(filename=log_file, level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s')
logger = logging.getLogger()
return logger

def fetch_and_process_json(github_user, github_repo, branch_name, json_directory, logger):
# GitHub API URL to list files in the directory on a specific branch
api_url = f'https://api.github.com/repos/{github_user}/{github_repo}/contents/{json_directory}?ref={branch_name}'

# Fetch the list of files in the directory
response = requests.get(api_url)
files = response.json()

# Dictionary to store DataFrames for each test
data_frames = {
'memoryUsageTest': [],
'cpuUsageForLoginsTest': [],
'cpuUsageForCredentialGrantsTest': []
}

basic_df = []

# Fetch each JSON file and append to the corresponding list
for file in files:
if file['name'].endswith('.json'):
file_url = file['download_url']
file_response = requests.get(file_url)
file_json = file_response.json()
df = pd.json_normalize(file_json)
basic_df.append(df)

# Debug: log the JSON content
logger.debug("Processing file: %s", file['name'])
logger.debug("JSON content: %s", json.dumps(file_json, indent=2))

# Normalize the JSON to extract specific fields for each test
for test in data_frames.keys():
if test in file_json:
df = json_normalize(
file_json,
record_path=[test, 'statistics'],
meta=[
'start',
'context',
[test, 'activeSessionsPer500MbPerPod'],
[test, 'userLoginsPerSecPer1vCpuPerPod'],
[test, 'credentialGrantsPerSecPer1vCpu']
],
record_prefix=f'{test}.',
errors='ignore'
)
data_frames[test].append(df)

combined_df = pd.concat(basic_df, ignore_index=True)
perf_across_deployments_df = combined_df[['start', 'context.externalInfinispanFeatureEnabled', 'context.persistentSessionsEnabled', 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', 'credentialGrantsPerSecPer1vCpu', 'memoryUsageTest.activeSessionsPer500MbPerPod']]

print(perf_across_deployments_df.to_csv(index=False))
# Concatenate all DataFrames for each test into a single DataFrame
combined_data_frames = {test: pd.concat(data_frames[test], ignore_index=True) for test in data_frames}

# Log the columns of the combined DataFrames
for test, df in combined_data_frames.items():
logger.debug(f"{test} DataFrame columns: {df.columns.tolist()}")
logger.debug(f"{test} DataFrame sample: {df.head()}")

return combined_data_frames

def save_to_csv(data_frames, json_directory, output_directory):
# Columns to include in the final CSVs for each test
columns_to_include = {
'memoryUsageTest': [
'start',
'context',
'memoryUsageTest.name',
'memoryUsageTest.activeSessionsPer500MbPerPod',
'memoryUsageTest.meanResponseTime.total',
'memoryUsageTest.percentiles1.total',
'memoryUsageTest.meanNumberOfRequestsPerSecond.total'
],
'cpuUsageForLoginsTest': [
'start',
'context',
'cpuUsageForLoginsTest.name',
'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod',
'cpuUsageForLoginsTest.meanResponseTime.total',
'cpuUsageForLoginsTest.percentiles1.total',
'cpuUsageForLoginsTest.meanNumberOfRequestsPerSecond.total'
],
'cpuUsageForCredentialGrantsTest': [
'start',
'context',
'cpuUsageForCredentialGrantsTest.name',
'cpuUsageForCredentialGrantsTest.credentialGrantsPerSecPer1vCpu',
'cpuUsageForCredentialGrantsTest.meanResponseTime.total',
'cpuUsageForCredentialGrantsTest.percentiles1.total',
'cpuUsageForCredentialGrantsTest.meanNumberOfRequestsPerSecond.total'
]
}

for test, df in data_frames.items():
# Reorder columns to include only the desired ones
available_columns = [col for col in columns_to_include[test] if col in df.columns]
df = df[available_columns]

test_date = json_directory.replace("/", "_")
# Save to CSV
csv_file_path = f"{output_directory}/{test_date}_{test}_results.csv"
df.to_csv(csv_file_path, index=False)
print(f"Saved {test} results to {csv_file_path}")

def main():
parser = argparse.ArgumentParser(description="Process JSON files from a GitHub repository.")
parser.add_argument('json_directory', type=str, help='The directory in the GitHub repository containing JSON files.')
parser.add_argument('output_directory', type=str, help='The directory to save the CSV files.')
args = parser.parse_args()

github_user = 'keycloak'
github_repo = 'keycloak-benchmark'
branch_name = 'result-data'
json_directory = args.json_directory
output_directory = args.output_directory

# Set up logger
log_file = 'perf_insights.log'
logger = setup_logger(log_file)

data_frames = fetch_and_process_json(github_user, github_repo, branch_name, json_directory, logger)
save_to_csv(data_frames, json_directory, output_directory)

if __name__ == '__main__':
main()
4 changes: 3 additions & 1 deletion benchmark/src/main/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
playwright==1.37.0
pandas==2.2.2
playwright==1.45.1
Requests==2.32.3
asyncio==3.4.3
typing==3.7.4.3
typing_extensions==4.7.1
45 changes: 45 additions & 0 deletions doc/kubernetes/modules/ROOT/pages/util/perf-insights.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
= Analyzing the Continuous Performance Test results
:description: A utility to perform custom analysis on the generated results from continuous performance tests.

{description}

== Continuous Performance Tests
The link:{github-files}/.github/workflows/rosa-cluster-auto-provision-on-schedule.yml[ROSA Daily Scheduled Run workflow] is an automated process that ensures continuous performance testing which is scheduled to run every weekday.

This workflow initiates by deploying a multi-AZ cluster, activating features such as external Infinispan and persistent sessions. It executes a series of functional tests to verify the system's performance and stability in an active-active configuration.

Following these tests, a scaling benchmark assesses the system's ability to handle varying loads, providing crucial insights into performance under real-world conditions and the results are then persisted to a `https://github.com/keycloak/keycloak-benchmark/tree/result-data/rosa_scalability[result-data branch]` in the benchmark's GitHub repository for further analysis.

This automated schedule ensures consistent testing, early detection of potential issues, and continuous improvement of {project_name}'s performance.

== Analyze the results

We have a Python script `link:{github-files}/benchmark/src/main/python/perfInsights.py[perfInsights.py]` which allows us to analyze the results generated from the Continuous Performance tests mentioned above.

=== Installing needed python libraries

[source,bash]
----
pip3 install -U -r requirements.txt
----

And we can check if all the requirements are satisfied using the below command.
[source,bash]
----
python3 -m pip check
----

=== Usage

Run the below command to access the results from the `https://github.com/keycloak/keycloak-benchmark/tree/result-data/rosa_scalability[result-data branch]` and save the analysis to a local dir.

[source, bash]
----
python3 perfInsights.py <result_json_dir_path_on_github> <output_dir_path>
----

[source, bash]
----
Example:
python3 perfInsights.py rosa_scalability/2024/07/23 results
----
1 change: 1 addition & 0 deletions doc/kubernetes/modules/ROOT/partials/util-nav.adoc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
** xref:util/sqlpad.adoc[]
** xref:util/grafana.adoc[]
** xref:util/perf-insights.adoc[]
** xref:util/prometheus.adoc[]
** xref:util/otel.adoc[]
** xref:util/debugging-keycloak.adoc[]
Expand Down

0 comments on commit c7aae9f

Please sign in to comment.