-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enabling runtime setting of cluster number Removing poetry import Reworked help New arg for clusters Adding fastapi dependency Moved inference and model initialization into utils Adding pydantic for server Prompt simplified We no longer need to notify model about special substrings, now that we retrieve representative log samples instead of templates. Exposing n_clusters arg Restoring local retrieval of logs Docstrings and import rearangement Signed-off-by: Jiri Podivin <jpodivin@redhat.com>
- Loading branch information
Showing
9 changed files
with
369 additions
and
257 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
|
||
# pylint: disable=line-too-long | ||
DEFAULT_ADVISOR = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true" | ||
|
||
PROMPT_TEMPLATE = """ | ||
Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package. | ||
{} | ||
Analysis of the failure must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation. | ||
Finally, drawing on information from all snippets, provide complete explanation of the issue. | ||
Analysis: | ||
""" | ||
|
||
SUMMARIZE_PROMPT_TEMPLATE = """ | ||
Does following log contain error or issue? | ||
Log: | ||
{} | ||
Answer: | ||
""" | ||
|
||
CACHE_LOC = "~/.cache/logdetective/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import os | ||
import logging | ||
|
||
import drain3 | ||
from drain3.template_miner_config import TemplateMinerConfig | ||
from llama_cpp import Llama, LlamaGrammar | ||
Check warning Code scanning / vcs-diff-lint Unable to import 'llama_cpp' Warning
Unable to import 'llama_cpp'
|
||
|
||
from logdetective.constants import SUMMARIZE_PROMPT_TEMPLATE | ||
from logdetective.utils import get_chunks | ||
|
||
LOG = logging.getLogger("logdetective") | ||
|
||
|
||
class LLMExtractor: | ||
""" | ||
A class that extracts relevant information from logs using a language model. | ||
""" | ||
def __init__(self, model_path: str, verbose: bool, n_lines: int = 2): | ||
self.model = Llama( | ||
model_path=model_path, | ||
n_ctx=0, | ||
verbose=verbose) | ||
self.n_lines = n_lines | ||
self.grammar = LlamaGrammar.from_string( | ||
"root ::= (\"Yes\" | \"No\")", verbose=False) | ||
|
||
def __call__(self, log: str, n_lines: int = 2, neighbors: bool = False) -> str: | ||
chunks = self.rate_chunks(log) | ||
out = self.create_extract(chunks, neighbors) | ||
return out | ||
|
||
def rate_chunks(self, log: str) -> list[tuple]: | ||
"""Scan log by the model and store results. | ||
:param log: log file content | ||
""" | ||
results = [] | ||
log_lines = log.split("\n") | ||
|
||
for i in range(0, len(log_lines), self.n_lines): | ||
block = '\n'.join(log_lines[i:i + self.n_lines]) | ||
prompt = SUMMARIZE_PROMPT_TEMPLATE.format(log) | ||
out = self.model(prompt, max_tokens=7, grammar=self.grammar) | ||
out = f"{out['choices'][0]['text']}\n" | ||
results.append((block, out)) | ||
|
||
return results | ||
|
||
def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> str: | ||
"""Extract interesting chunks from the model processing. | ||
""" | ||
interesting = [] | ||
summary = "" | ||
# pylint: disable=consider-using-enumerate | ||
for i in range(len(chunks)): | ||
if chunks[i][1].startswith("Yes"): | ||
interesting.append(i) | ||
if neighbors: | ||
interesting.extend([max(i - 1, 0), min(i + 1, len(chunks) - 1)]) | ||
|
||
interesting = set(interesting) | ||
|
||
for i in interesting: | ||
summary += chunks[i][0] + "\n" | ||
|
||
return summary | ||
|
||
|
||
class DrainExtractor: | ||
"""A class that extracts information from logs using a template miner algorithm. | ||
""" | ||
def __init__(self, verbose: bool = False, context: bool = False, max_clusters=8): | ||
config = TemplateMinerConfig() | ||
config.load(f"{os.path.dirname(__file__)}/drain3.ini") | ||
config.profiling_enabled = verbose | ||
config.drain_max_clusters = max_clusters | ||
self.miner = drain3.TemplateMiner(config=config) | ||
self.verbose = verbose | ||
self.context = context | ||
|
||
def __call__(self, log: str) -> str: | ||
out = "" | ||
for chunk in get_chunks(log): | ||
processed_line = self.miner.add_log_message(chunk) | ||
LOG.debug(processed_line) | ||
sorted_clusters = sorted(self.miner.drain.clusters, key=lambda it: it.size, reverse=True) | ||
for chunk in get_chunks(log): | ||
cluster = self.miner.match(chunk, "always") | ||
if cluster in sorted_clusters: | ||
out += f"{chunk}\n" | ||
sorted_clusters.remove(cluster) | ||
return out |
Oops, something went wrong.