-
Notifications
You must be signed in to change notification settings - Fork 1
/
checkpy_plot.py
184 lines (156 loc) · 6.1 KB
/
checkpy_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import os
import re
import sqlite3
from datetime import datetime
import pandas as pd
from bokeh.plotting import figure, output_file, save, show
from bokeh.transform import factor_cmap
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Spectral6
from textwrap import wrap
from bokeh.models import Legend, LegendItem
# Directory containing the log files
log_directory = '/Users/dokigbo/Downloads/vso_health_summer_project/vso_health_logs_python'
# Patterns to match lines containing error/warning messages and source name
error_warning_pattern = re.compile(r'(FAILED|WARNING|ERROR).*', re.IGNORECASE)
source_name_pattern = re.compile(r'Query:\s*([^\|]+?)\s*\|\s*([^\|]+?)\s*\|\s*([^\|]+?)\s*between', re.IGNORECASE) #looks for source_names between Query: and between. Not all source_names were seperated by 3 commAS
#Some had dashes and underscores
# Connect to the SQLite database
conn = sqlite3.connect('vso_files.db')
cur = conn.cursor()
# Create or replace the log_entries_python table
cur.execute('''
CREATE TABLE IF NOT EXISTS log_entries_python (
id INTEGER PRIMARY KEY AUTOINCREMENT,
log_file TEXT NOT NULL,
log_entry TEXT NOT NULL,
entry_date TEXT NOT NULL,
source_name TEXT,
error_message TEXT
)
''')
# Function to parse log files and insert into the database
def parse_log_files(directory):
for filename in os.listdir(directory):
if filename.endswith(".log"):
file_path = os.path.join(directory, filename)
date_part = filename.split('_')[2] # Extract date from filename
try:
entry_date = datetime.strptime(date_part, '%Y%m%d').strftime('%Y-%m-%d')
except ValueError:
print(f"Filename {filename} contains an invalid date format.")
continue
with open(file_path, 'r') as file:
for line in file:
source_match = source_name_pattern.search(line)
if source_match:
provider, source, instrument = source_match.groups()
source_name = f"{provider.strip()}-{source.strip()}-{instrument.strip()}"
error_message = line.strip()
cur.execute('''
INSERT INTO log_entries_python (log_file, log_entry, entry_date, source_name, error_message)
VALUES (?, ?, ?, ?, ?)
''', (filename, line.strip(), entry_date, source_name, error_message))
# Parse the log files and insert the data
parse_log_files(log_directory)
conn.commit()
# Query to get data for the Bokeh plot
#Want to see nore or less dates remove the limit. Or increase the numbers
query = '''
WITH DistinctDates AS (
SELECT DISTINCT check_date
FROM check_files_python
ORDER BY check_date DESC
LIMIT 30
)
SELECT
c.source_name,
c.check_date,
c.status,
l.error_message
FROM check_files_python c
LEFT JOIN log_entries_python l
ON c.source_name = l.source_name AND c.check_date = l.entry_date
WHERE c.check_date IN (SELECT check_date FROM DistinctDates)
ORDER BY c.check_date
'''
df = pd.read_sql_query(query, conn)
conn.close()
# Group by 'source_name', 'check_date', and 'status', then concatenate unique error messages
df_grouped = df.groupby(['source_name', 'check_date', 'status'])['error_message'].apply(
lambda x: "<br>".join(wrap("\n".join(x.dropna().unique()), 40))
).reset_index()
# Add custom tooltip messages based on status
def get_tooltip_message(status, error_message):
if status == 2:
return "Status is 2 which means it's skipped and therefore no message"
return error_message
df_grouped['tooltip_message'] = df_grouped.apply(lambda row: get_tooltip_message(row['status'], row['error_message']), axis=1)
# Prepare the data for Bokeh
df_grouped['check_date'] = pd.to_datetime(df_grouped['check_date'])
df_grouped['status_str'] = df_grouped['status'].astype(str)
# Create a color mapper
status_list = df_grouped['status_str'].unique().tolist()
color_map = {
'1': 'green', # Pass or known query
'0': '#32CD32', # Pass
'9': 'red', # Fail no response (no data)
'8': 'orange', # Fail on download
'2': 'yellow' # Skipped
}
# Prepare the data for Bokeh
df_grouped['color'] = df_grouped['status_str'].map(color_map)
source = ColumnDataSource(df_grouped)
# Create the figure
p = figure(
x_axis_type='datetime',
x_axis_label='Check Date',
y_range=sorted(df_grouped['source_name'].unique().tolist(), reverse=True), #Reverse= True puts tge Y axis in alphabetical Order, may have to remove it when you run it
y_axis_label='Source Name',
title='Python Health Check Status Over Time',
height=4000,
width=1200,
tools="pan,wheel_zoom,box_zoom,reset"
)
# Add circle glyphs to the plot
circle = p.circle(
x='check_date',
y='source_name',
size=10,
source=source,
color='color',
legend_field='status_str'
)
# Add HoverTool with custom tooltips
hover = HoverTool(
tooltips="""
<div>
<div><strong>Date:</strong> @check_date{%F}</div>
<div><strong>Source Name:</strong> @source_name</div>
<div><strong>Status:</strong> @status</div>
<div><strong>Message:</strong> @tooltip_message</div>
</div>
""",
formatters={
'@check_date': 'datetime'
},
mode='mouse'
)
p.add_tools(hover)
# Customize the plot
p.yaxis.major_label_orientation = 0
p.legend.title = 'Status'
# Create custom legend items
legend_items = [
LegendItem(label='1 (Pass or known query)', renderers=[circle], index=status_list.index('1')),
LegendItem(label='0 (Pass)', renderers=[circle], index=status_list.index('0')),
LegendItem(label='9 (Fail no response)', renderers=[circle], index=status_list.index('9')),
LegendItem(label='8 (Fail on download)', renderers=[circle], index=status_list.index('8')),
LegendItem(label='2 (Skipped)', renderers=[circle], index=status_list.index('2'))
]
legend = Legend(items=legend_items, location='center')
p.add_layout(legend, 'right')
# Save the plot as HTML and display it
output_file("py_health_check_status.html")
save(p)
show(p)