diff --git a/README.md b/README.md
index a2008cb..8fd76d8 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,8 @@ To analyze a log file, run the script with the following command line arguments:
 - `url` (required): The URL of the log file to be analyzed.
 - `--model` (optional, default: "Mistral-7B-Instruct-v0.2-GGUF"): The path or URL of the language model for analysis.
 - `--summarizer` (optional, default: "drain"): Choose between LLM and Drain template miner as the log summarizer. You can also provide the path to an existing language model file instead of using a URL.
-- `--n_lines` (optional, default: 5): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
+- `--n_lines` (optional, default: 8): The number of lines per chunk for LLM analysis. This only makes sense when you are summarizing with LLM.
+- `--n_clusters` (optional, default 8): Number of clusters for Drain to organize log chunks into. This only makes sense when you are summarizing with Drain
 
 Example usage:
 
diff --git a/logdetective/__init__.py b/logdetective/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/logdetective/constants.py b/logdetective/constants.py
new file mode 100644
index 0000000..e26f252
--- /dev/null
+++ b/logdetective/constants.py
@@ -0,0 +1,29 @@
+
+# pylint: disable=line-too-long
+DEFAULT_ADVISOR = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true"
+
+PROMPT_TEMPLATE = """
+Given following log snippets, and nothing else, explain what failure, if any, occured during build of this package.
+
+{}
+
+Analysis of the failure must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
+
+Finally, drawing on information from all snippets, provide complete explanation of the issue.
+
+Analysis:
+
+"""
+
+SUMMARIZE_PROMPT_TEMPLATE = """
+Does following log contain error or issue?
+
+Log:
+
+{}
+
+Answer:
+
+"""
+
+CACHE_LOC = "~/.cache/logdetective/"
diff --git a/logdetective/extractors.py b/logdetective/extractors.py
new file mode 100644
index 0000000..4e9b4f5
--- /dev/null
+++ b/logdetective/extractors.py
@@ -0,0 +1,92 @@
+import os
+import logging
+
+import drain3
+from drain3.template_miner_config import TemplateMinerConfig
+from llama_cpp import Llama, LlamaGrammar
+
+from logdetective.constants import SUMMARIZE_PROMPT_TEMPLATE
+from logdetective.utils import get_chunks
+
+LOG = logging.getLogger("logdetective")
+
+
+class LLMExtractor:
+    """
+    A class that extracts relevant information from logs using a language model.
+    """
+    def __init__(self, model_path: str, verbose: bool, n_lines: int = 2):
+        self.model = Llama(
+            model_path=model_path,
+            n_ctx=0,
+            verbose=verbose)
+        self.n_lines = n_lines
+        self.grammar = LlamaGrammar.from_string(
+            "root ::= (\"Yes\" | \"No\")", verbose=False)
+
+    def __call__(self, log: str, n_lines: int = 2, neighbors: bool = False) -> str:
+        chunks = self.rate_chunks(log)
+        out = self.create_extract(chunks, neighbors)
+        return out
+
+    def rate_chunks(self, log: str) -> list[tuple]:
+        """Scan log by the model and store results.
+
+        :param log: log file content
+        """
+        results = []
+        log_lines = log.split("\n")
+
+        for i in range(0, len(log_lines), self.n_lines):
+            block = '\n'.join(log_lines[i:i + self.n_lines])
+            prompt = SUMMARIZE_PROMPT_TEMPLATE.format(log)
+            out = self.model(prompt, max_tokens=7, grammar=self.grammar)
+            out = f"{out['choices'][0]['text']}\n"
+            results.append((block, out))
+
+        return results
+
+    def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> str:
+        """Extract interesting chunks from the model processing.
+        """
+        interesting = []
+        summary = ""
+        # pylint: disable=consider-using-enumerate
+        for i in range(len(chunks)):
+            if chunks[i][1].startswith("Yes"):
+                interesting.append(i)
+                if neighbors:
+                    interesting.extend([max(i - 1, 0), min(i + 1, len(chunks) - 1)])
+
+        interesting = set(interesting)
+
+        for i in interesting:
+            summary += chunks[i][0] + "\n"
+
+        return summary
+
+
+class DrainExtractor:
+    """A class that extracts information from logs using a template miner algorithm.
+    """
+    def __init__(self, verbose: bool = False, context: bool = False, max_clusters=8):
+        config = TemplateMinerConfig()
+        config.load(f"{os.path.dirname(__file__)}/drain3.ini")
+        config.profiling_enabled = verbose
+        config.drain_max_clusters = max_clusters
+        self.miner = drain3.TemplateMiner(config=config)
+        self.verbose = verbose
+        self.context = context
+
+    def __call__(self, log: str) -> str:
+        out = ""
+        for chunk in get_chunks(log):
+            processed_line = self.miner.add_log_message(chunk)
+            LOG.debug(processed_line)
+        sorted_clusters = sorted(self.miner.drain.clusters, key=lambda it: it.size, reverse=True)
+        for chunk in get_chunks(log):
+            cluster = self.miner.match(chunk, "always")
+            if cluster in sorted_clusters:
+                out += f"{chunk}\n"
+                sorted_clusters.remove(cluster)
+        return out
diff --git a/logdetective/logdetective.py b/logdetective/logdetective.py
index a38c524..6fa699b 100644
--- a/logdetective/logdetective.py
+++ b/logdetective/logdetective.py
@@ -2,248 +2,31 @@
 import logging
 import os
 import sys
-from urllib.request import urlretrieve
-from urllib.parse import urlparse
 
-import drain3
-import numpy as np
-import progressbar
-import requests
-from drain3.template_miner_config import TemplateMinerConfig
-from llama_cpp import Llama, LlamaGrammar
-
-# pylint: disable=line-too-long
-DEFAULT_ADVISOR = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true"
-
-# pylint: disable=line-too-long
-DEFAULT_LLM_RATER = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf?download=true"
-
-PROMPT_TEMPLATE = """
-Given following log snippets, and nothing else, explain what failure, if any occured during build of this package.
-Ignore strings wrapped in <: :>, such as <:*:>.
-
-{}
-
-Analysis of the failure must be in a format of [X] : [Y], where [X] is a log snippet, and [Y] is the explanation.
-
-Finally, drawing on information from all snippets, provide complete explanation of the issue.
-
-Analysis:
-
-"""
-
-SUMMARIZE_PROMPT_TEMPLATE = """
-Does following log contain error or issue?
-
-Log:
-
-{}
-
-Answer:
-
-"""
-
-CACHE_LOC = "~/.cache/logdetective/"
+from logdetective.constants import DEFAULT_ADVISOR, CACHE_LOC
+from logdetective.utils import download_model, process_log, initialize_model, retrieve_log_content
+from logdetective.extractors import LLMExtractor, DrainExtractor
 
 LOG = logging.getLogger("logdetective")
 
 
-class MyProgressBar():
-    """Show progress when downloading model."""
-    def __init__(self):
-        self.pbar = None
-
-    def __call__(self, block_num, block_size, total_size):
-        if not self.pbar:
-            self.pbar = progressbar.ProgressBar(maxval=total_size)
-            self.pbar.start()
-
-        downloaded = block_num * block_size
-        if downloaded < total_size:
-            self.pbar.update(downloaded)
-        else:
-            self.pbar.finish()
-
-
-def chunk_continues(text: str, index: int) -> bool:
-    """Set of heuristics for determining whether or not
-    does the current chunk of log text continue on next line.
-    """
-    conditionals = [
-        lambda i, string: string[i + 1].isspace(),
-        lambda i, string: string[i - 1] == "\\"
-    ]
-
-    for c in conditionals:
-        y = c(index, text)
-        if y:
-            return True
-
-    return False
-
-
-def get_chunks(text: str):
-    """Split log into chunks according to heuristic
-    based on whitespace and backslash presence.
-    """
-    text_len = len(text)
-    i = 0
-    chunk = ""
-    while i < text_len:
-        chunk += text[i]
-        if text[i] == '\n':
-            if i + 1 < text_len and chunk_continues(text, i):
-                i += 1
-                continue
-            yield chunk
-            chunk = ""
-        i += 1
-
-
-class LLMExtractor:
-    """
-    A class that extracts relevant information from logs using a language model.
-    """
-    def __init__(self, model_path: str, verbose: bool):
-        self.model = Llama(
-            model_path=model_path,
-            n_ctx=0,
-            verbose=verbose)
-        self.grammar = LlamaGrammar.from_string(
-            "root ::= (\"Yes\" | \"No\")", verbose=False)
-
-    def __call__(self, log: str, n_lines: int = 2, neighbors: bool = False) -> str:
-        chunks = self.rate_chunks(log, n_lines)
-        out = self.create_extract(chunks, neighbors)
-        return out
-
-    def rate_chunks(self, log: str, n_lines: int = 2) -> list[tuple]:
-        """Scan log by the model and store results.
-
-        :param log: log file content
-        :param n_lines: How many lines should the model take into consideration
-        """
-        results = []
-        log_lines = log.split("\n")
-
-        for i in range(0, len(log_lines), n_lines):
-            block = '\n'.join(log_lines[i:i + n_lines])
-            prompt = SUMMARIZE_PROMPT_TEMPLATE.format(log)
-            out = self.model(prompt, max_tokens=7, grammar=self.grammar)
-            out = f"{out['choices'][0]['text']}\n"
-            results.append((block, out))
-
-        return results
-
-    def create_extract(self, chunks: list[tuple], neighbors: bool = False) -> str:
-        """Extract interesting chunks from the model processing.
-        """
-        interesting = []
-        summary = ""
-        # pylint: disable=consider-using-enumerate
-        for i in range(len(chunks)):
-            if chunks[i][1].startswith("Yes"):
-                interesting.append(i)
-                if neighbors:
-                    interesting.extend([max(i - 1, 0), min(i + 1, len(chunks) - 1)])
-
-        interesting = np.unique(interesting)
-
-        for i in interesting:
-            summary += chunks[i][0] + "\n"
-
-        return summary
-
-
-class DrainExtractor:
-    """A class that extracts information from logs using a template miner algorithm.
-    """
-    def __init__(self, verbose: bool = False, context: bool = False):
-        config = TemplateMinerConfig()
-        config.load(f"{os.path.dirname(__file__)}/drain3.ini")
-        config.profiling_enabled = verbose
-        self.miner = drain3.TemplateMiner(config=config)
-        self.verbose = verbose
-        self.context = context
-
-    def __call__(self, log: str) -> str:
-        out = ""
-        for chunk in get_chunks(log):
-            processed_line = self.miner.add_log_message(chunk)
-            LOG.debug(processed_line)
-        sorted_clusters = sorted(self.miner.drain.clusters, key=lambda it: it.size, reverse=True)
-        for chunk in get_chunks(log):
-            cluster = self.miner.match(chunk, "always")
-            if cluster in sorted_clusters:
-                out += f"{chunk}\n"
-                sorted_clusters.remove(cluster)
-        return out
-
-
-def download_model(url: str, verbose: bool = False) -> str:
-    """ Downloads a language model from a given URL and saves it to the cache directory.
-
-    Args:
-        url (str): The URL of the language model to be downloaded.
-
-    Returns:
-        str: The local file path of the downloaded language model.
-    """
-    path = os.path.join(
-        os.path.expanduser(CACHE_LOC), url.split('/')[-1])
-
-    LOG.info("Downloading model from %s to %s", url, path)
-    if not os.path.exists(path):
-        if verbose:
-            path, _status = urlretrieve(url, path, MyProgressBar())
-        else:
-            path, _status = urlretrieve(url, path)
-
-    return path
-
-
-def process_log(log: str, model: Llama) -> str:
-    """
-    Processes a given log using the provided language model and returns its summary.
-
-    Args:
-        log (str): The input log to be processed.
-        model (Llama): The language model used for processing the log.
-
-    Returns:
-        str: The summary of the given log generated by the language model.
-    """
-    return model(PROMPT_TEMPLATE.format(log), max_tokens=0)["choices"][0]["text"]
-
-
-def retrieve_log_content(log_path):
-    """Get content of the file on the log_path path."""
-    parsed_url = urlparse(log_path)
-    log = ""
-
-    if not parsed_url.scheme:
-        if not os.path.exists(log_path):
-            raise ValueError(f"Local log {log_path} doesn't exist!")
-
-        with open(log_path, "rt") as f:
-            log = f.read()
-
-    else:
-        log = requests.get(log_path, timeout=60).text
-
-    return log
-
-
 def main():
     """Main execution function."""
     parser = argparse.ArgumentParser("logdetective")
-    parser.add_argument("url", type=str, default="")
-    parser.add_argument("-M", "--model", type=str, default=DEFAULT_ADVISOR)
-    parser.add_argument("-S", "--summarizer", type=str, default="drain")
-    parser.add_argument("-N", "--n_lines", type=int, default=5)
+    parser.add_argument("file", type=str, default="", help="The URL or path to the log file to be analyzed.")
+    parser.add_argument("-M", "--model", help="The path or URL of the language model for analysis.",
+                        type=str, default=DEFAULT_ADVISOR)
+    parser.add_argument("-S", "--summarizer", type=str, default="drain",
+                        help="Choose between LLM and Drain template miner as the log summarizer.\
+                                LLM must be specified as path to a model, URL or local file.")
+    parser.add_argument("-N", "--n_lines", type=int,
+                        default=8, help="The number of lines per chunk for LLM analysis.\
+                            This only makes sense when you are summarizing with LLM.")
+    parser.add_argument("-C", "--n_clusters", type=int, default=8,
+                        help="Number of clusters for Drain to organize log chunks into.\
+                            This only makes sense when you are summarizing with Drain")
     parser.add_argument("-v", "--verbose", action='count', default=0)
     parser.add_argument("-q", "--quiet", action='store_true')
-
     args = parser.parse_args()
 
     if args.verbose and args.quiet:
@@ -266,20 +49,17 @@ def main():
         model_pth = args.model
 
     if args.summarizer == "drain":
-        extractor = DrainExtractor(args.verbose > 1, context=True)
+        extractor = DrainExtractor(args.verbose > 1, context=True, max_clusters=args.n_clusters)
     elif os.path.isfile(args.summarizer):
-        extractor = LLMExtractor(args.summarizer, args.verbose > 1)
+        extractor = LLMExtractor(args.summarizer, args.verbose > 1, args.n_lines)
     else:
         summarizer_pth = download_model(args.summarizer, not args.quiet)
         extractor = LLMExtractor(summarizer_pth, args.verbose > 1)
 
     LOG.info("Getting summary")
-    model = Llama(
-        model_path=model_pth,
-        n_ctx=0,
-        verbose=args.verbose > 2)
+    model = initialize_model(model_pth, args.verbose > 2)
 
-    log = retrieve_log_content(args.url)
+    log = retrieve_log_content(args.file)
     log_summary = extractor(log)
 
     ratio = len(log_summary.split('\n')) / len(log.split('\n'))
diff --git a/logdetective/server.py b/logdetective/server.py
new file mode 100644
index 0000000..f4c998a
--- /dev/null
+++ b/logdetective/server.py
@@ -0,0 +1,54 @@
+import logging
+import os
+import json
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+import requests
+
+from logdetective.constants import PROMPT_TEMPLATE
+from logdetective.extractors import DrainExtractor
+
+
+class BuildLog(BaseModel):
+    """Model of data submitted to API.
+    """
+    url: str
+
+LOG = logging.getLogger("logdetective")
+
+app = FastAPI()
+
+LLM_CPP_SERVER_ADDRESS = os.environ.get("LLAMA_CPP_SERVER", " http://localhost")
+LLM_CPP_SERVER_PORT = os.environ.get("LLAMA_CPP_SERVER_PORT", 8000)
+LLM_CPP_SERVER_TIMEOUT = os.environ.get("LLAMA_CPP_SERVER_TIMEOUT", 200)
+
+@app.post("/analyze", )
+async def analyze_log(build_log: BuildLog):
+    """Provide endpoint for log file submission and analysis
+    """
+    extractor = DrainExtractor(verbose=True, context=True, max_clusters=8)
+
+    LOG.info("Getting summary")
+
+    log = requests.get(build_log.url, timeout=60).text
+    log_summary = extractor(log)
+
+    ratio = len(log_summary.split('\n')) / len(log.split('\n'))
+    LOG.debug("Log summary: \n %s", log_summary)
+    LOG.info("Compression ratio: %s", ratio)
+
+    LOG.info("Analyzing the text")
+    data = {
+            "prompt": PROMPT_TEMPLATE.format(log_summary),
+            "max_tokens": "0"}
+
+    # Expects llama-cpp server to run on LLM_CPP_SERVER_ADDRESS:LLM_CPP_SERVER_PORT
+    response = requests.post(
+        f"{LLM_CPP_SERVER_ADDRESS}:{LLM_CPP_SERVER_PORT}/v1/completions",
+        headers={"Content-Type":"application/json"},
+        data=json.dumps(data),
+        timeout=int(LLM_CPP_SERVER_TIMEOUT))
+
+    return response.text
diff --git a/logdetective/utils.py b/logdetective/utils.py
new file mode 100644
index 0000000..08ea188
--- /dev/null
+++ b/logdetective/utils.py
@@ -0,0 +1,132 @@
+import logging
+import os
+from urllib.parse import urlparse
+from urllib.request import urlretrieve
+
+import requests
+import progressbar
+
+from llama_cpp import Llama
+from logdetective.constants import CACHE_LOC, PROMPT_TEMPLATE
+
+
+LOG = logging.getLogger("logdetective")
+
+
+class MyProgressBar():
+    """Show progress when downloading model."""
+    def __init__(self):
+        self.pbar = None
+
+    def __call__(self, block_num, block_size, total_size):
+        if not self.pbar:
+            self.pbar = progressbar.ProgressBar(maxval=total_size)
+            self.pbar.start()
+
+        downloaded = block_num * block_size
+        if downloaded < total_size:
+            self.pbar.update(downloaded)
+        else:
+            self.pbar.finish()
+
+
+def chunk_continues(text: str, index: int) -> bool:
+    """Set of heuristics for determining whether or not
+    does the current chunk of log text continue on next line.
+    """
+    conditionals = [
+        lambda i, string: string[i + 1].isspace(),
+        lambda i, string: string[i - 1] == "\\"
+    ]
+
+    for c in conditionals:
+        y = c(index, text)
+        if y:
+            return True
+
+    return False
+
+
+def get_chunks(text: str):
+    """Split log into chunks according to heuristic
+    based on whitespace and backslash presence.
+    """
+    text_len = len(text)
+    i = 0
+    chunk = ""
+    while i < text_len:
+        chunk += text[i]
+        if text[i] == '\n':
+            if i + 1 < text_len and chunk_continues(text, i):
+                i += 1
+                continue
+            yield chunk
+            chunk = ""
+        i += 1
+
+
+def download_model(url: str, verbose: bool = False) -> str:
+    """ Downloads a language model from a given URL and saves it to the cache directory.
+
+    Args:
+        url (str): The URL of the language model to be downloaded.
+
+    Returns:
+        str: The local file path of the downloaded language model.
+    """
+    path = os.path.join(
+        os.path.expanduser(CACHE_LOC), url.split('/')[-1])
+
+    LOG.info("Downloading model from %s to %s", url, path)
+    if not os.path.exists(path):
+        if verbose:
+            path, _status = urlretrieve(url, path, MyProgressBar())
+        else:
+            path, _status = urlretrieve(url, path)
+
+    return path
+
+
+def initialize_model(model_pth: str, verbose: bool) -> Llama:
+    """Initialize Llama class for inference.
+    Args:
+        model_pth (str): path to gguf model file
+        verbose (bool): level of verbosity for llamacpp
+    """
+    model = Llama(
+        model_path=model_pth,
+        n_ctx=0,  # Maximum context for the model
+        verbose=verbose)
+
+    return model
+
+
+def process_log(log: str, model: Llama) -> str:
+    """
+    Processes a given log using the provided language model and returns its summary.
+
+    Args:
+        log (str): The input log to be processed.
+        model (Llama): The language model used for processing the log.
+
+    Returns:
+        str: The summary of the given log generated by the language model.
+    """
+    return model(PROMPT_TEMPLATE.format(log), max_tokens=0)["choices"][0]["text"]
+
+def retrieve_log_content(log_path: str) -> str:
+    """Get content of the file on the log_path path."""
+    parsed_url = urlparse(log_path)
+    log = ""
+
+    if not parsed_url.scheme:
+        if not os.path.exists(log_path):
+            raise ValueError(f"Local log {log_path} doesn't exist!")
+
+        with open(log_path, "rt") as f:
+            log = f.read()
+
+    else:
+        log = requests.get(log_path, timeout=60).text
+
+    return log
diff --git a/poetry.lock b/poetry.lock
index 42d494b..bdea15c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "cachetools"
@@ -570,7 +570,10 @@ h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
 
+[extras]
+server = []
+
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "8a12348a4f765827e190d5e9293c9025ded491bd2d31146045040f83359561c6"
+content-hash = "c7ef77fd33ebc10a6e6727c1e33f8dedd50b35e1f12c02b9e2abd48d263d9d1f"
diff --git a/pyproject.toml b/pyproject.toml
index 2964e67..6e87cb1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,9 @@ authors = ["Jiri Podivin <jpodivin@gmail.com>"]
 license = "Apache-2.0"
 readme = "README.md"
 include = ["logdetective/drain3.ini"]
+packages = [
+    { include = "logdetective" }
+]
 classifiers = [
     "Development Status :: 4 - Beta",
     "Environment :: Console",
@@ -37,6 +40,9 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry.scripts]
 logdetective = 'logdetective.logdetective:main'
 
+[tool.poetry.extras]
+server = ["fastapi", "pydantic"]
+
 [tool.pylint]
 disable = [
     "inconsistent-return-statements",