Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -367,4 +367,4 @@ appsettings.Local.json
docker/ollama-cache
docker/vllm-cache
*.xlsx
TODO.md
*.pdf
1 change: 1 addition & 0 deletions src/Cellm/Cellm.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
<PackageReference Include="Microsoft.Extensions.Logging.Debug" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Options" Version="8.0.2" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="8.0.0" />
<PackageReference Include="PdfPig" Version="0.1.9" />
<PackageReference Include="Sentry.Extensions.Logging" Version="4.10.2" />
<PackageReference Include="Sentry.Profiling" Version="4.10.2" />
<Content Include="appsettings.json">
Expand Down
4 changes: 4 additions & 0 deletions src/Cellm/Models/Client.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ public async Task<Prompt> Send(Prompt prompt, string? provider, Uri? baseAddress
{
throw new CellmException($"Method not supported: {ex.Message}", ex);
}
catch (FileReaderException ex)
{
throw new CellmException($"File could not be read: {ex.Message}", ex);
}
catch (NullReferenceException ex)
{
throw new CellmException($"Null reference error: {ex.Message}", ex);
Expand Down
7 changes: 5 additions & 2 deletions src/Cellm/Services/ServiceLocator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using Cellm.Models.PipelineBehavior;
using Cellm.Services.Configuration;
using Cellm.Tools;
using Cellm.Tools.FileReader;
using Cellm.Tools.Glob;
using ExcelDna.Integration;
using MediatR;
Expand Down Expand Up @@ -98,11 +99,13 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
.AddMemoryCache()
.AddSingleton<Cache>();


// Tools
services
.AddSingleton<ToolRunner>()
.AddSingleton<ToolFactory>();
.AddSingleton<ToolFactory>()
.AddSingleton<FileReaderFactory>()
.AddSingleton<IFileReader, PdfReader>()
.AddSingleton<IFileReader, TextReader>();

// Model Providers
var rateLimiterConfiguration = configuration.GetRequiredSection(nameof(RateLimiterConfiguration)).Get<RateLimiterConfiguration>()
Expand Down
11 changes: 11 additions & 0 deletions src/Cellm/Tools/FileReader/Exceptions/FileReaderException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
public class FileReaderException : Exception
{
public FileReaderException(string message) : base(message)
{
}

public FileReaderException(string message, Exception innerException)
: base(message, innerException)
{
}
}
17 changes: 17 additions & 0 deletions src/Cellm/Tools/FileReader/FileReaderFactory.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
namespace Cellm.Tools.FileReader;

internal class FileReaderFactory
{
private readonly IEnumerable<IFileReader> _readers;

public FileReaderFactory(IEnumerable<IFileReader> readers)
{
_readers = readers;
}

public IFileReader GetReader(string filePath)
{
return _readers.FirstOrDefault(r => r.CanRead(filePath))
?? throw new NotSupportedException($"No reader found for file: {filePath}");
}
}
6 changes: 6 additions & 0 deletions src/Cellm/Tools/FileReader/FileReaderRequest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
using System.ComponentModel;
using MediatR;

namespace Cellm.Tools.FileReader;

internal record FileReaderRequest([Description("The absolute path to the file.")] string FilePath) : IRequest<FileReaderResponse>;
35 changes: 35 additions & 0 deletions src/Cellm/Tools/FileReader/FileReaderRequestHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using MediatR;

namespace Cellm.Tools.FileReader;

internal class FileReaderRequestHandler : IRequestHandler<FileReaderRequest, FileReaderResponse>
{
private readonly FileReaderFactory _fileReaderFactory;

public FileReaderRequestHandler(FileReaderFactory fileReaderFactory)
{
_fileReaderFactory = fileReaderFactory;
}

public async Task<FileReaderResponse> Handle(FileReaderRequest request, CancellationToken cancellationToken)
{
try
{
var reader = _fileReaderFactory.GetReader(request.FilePath);
var content = await reader.ReadContent(request.FilePath, cancellationToken);
return new FileReaderResponse(content);
}
catch (ArgumentException ex)
{
throw new FileReaderException($"Failed to read file: {request.FilePath}", ex);
}
catch (FileNotFoundException ex)
{
throw new FileReaderException($"Failed to read file: {request.FilePath}", ex);
}
catch (NotSupportedException ex)
{
throw new FileReaderException($"Failed to read file: {request.FilePath}", ex);
}
}
}
5 changes: 5 additions & 0 deletions src/Cellm/Tools/FileReader/FileReaderResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
using System.ComponentModel;

namespace Cellm.Tools.FileReader;

internal record FileReaderResponse([Description("The content of the file")] string FileContent);
6 changes: 6 additions & 0 deletions src/Cellm/Tools/FileReader/IFileReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
internal interface IFileReader
{
public bool CanRead(string filePath);

public Task<string> ReadContent(string filePath, CancellationToken cancellationToken);
}
50 changes: 50 additions & 0 deletions src/Cellm/Tools/FileReader/PdfReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
using System.Text;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
using UglyToad.PdfPig.DocumentLayoutAnalysis.ReadingOrderDetector;
using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;

internal class PdfReader : IFileReader
{
public bool CanRead(string filePath)
{
if (string.IsNullOrWhiteSpace(filePath))
{
throw new ArgumentException("File path cannot be null or empty.", nameof(filePath));
}

if (!File.Exists(filePath))
{
throw new FileNotFoundException($"File not found: {filePath}");
}

return Path.GetExtension(filePath).ToLower() is ".pdf";
}

public Task<string> ReadContent(string filePath, CancellationToken cancellationToken)
{
var stringBuilder = new StringBuilder();

using (PdfDocument document = PdfDocument.Open(filePath))
{
foreach (Page page in document.GetPages())
{
var pageSegmenter = DocstrumBoundingBoxes.Instance;
var textBlocks = pageSegmenter.GetBlocks(page.GetWords());

var readingOrder = UnsupervisedReadingOrderDetector.Instance;
var orderedTextBlocks = readingOrder.Get(textBlocks);

foreach (var orderedTextBlock in orderedTextBlocks)
{
var text = orderedTextBlock.Text.Normalize(NormalizationForm.FormKC);
stringBuilder.Append(text);
stringBuilder.AppendLine();
}
}
}

return Task.FromResult(stringBuilder.ToString());
}
}
35 changes: 35 additions & 0 deletions src/Cellm/Tools/FileReader/TextReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using System.Text;

internal class TextReader : IFileReader
{
private readonly List<string> _extensions;

public TextReader()
{
_extensions = new List<string> { ".c", ".cpp", ".cs", ".csv", ".cxx", ".h", ".hxx", ".html", ".java", ".json", ".jsonl", ".md", ".php", ".py", ".rb", ".txt", ".xml" };
}

public bool CanRead(string filePath)
{
if (string.IsNullOrWhiteSpace(filePath))
{
throw new ArgumentException("File path cannot be null or empty.", nameof(filePath));
}

if (!File.Exists(filePath))
{
throw new FileNotFoundException($"File not found: {filePath}");
}

return _extensions.Contains(Path.GetExtension(filePath).ToLowerInvariant());
}

public async Task<string> ReadContent(string filePath, CancellationToken cancellationToken)
{
using (var stream = File.OpenRead(filePath))
using (var reader = new StreamReader(stream, Encoding.UTF8, true))
{
return await reader.ReadToEndAsync();
}
}
}
7 changes: 6 additions & 1 deletion src/Cellm/Tools/ToolRunner.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Cellm.Models;
using Cellm.Prompts;
using Cellm.Tools.FileReader;
using Cellm.Tools.Glob;
using MediatR;

Expand All @@ -17,7 +18,10 @@ public ToolRunner(ISender sender, Serde serde, ToolFactory toolFactory)
_sender = sender;
_serde = serde;
_toolFactory = toolFactory;
_toolTypes = new List<Type>() { typeof(GlobRequest) };
_toolTypes = new List<Type>() {
typeof(GlobRequest),
typeof(FileReaderRequest)
};
}

public List<Tool> GetTools()
Expand All @@ -30,6 +34,7 @@ public async Task<string> Run(ToolCall toolCall)
return toolCall.Name switch
{
nameof(GlobRequest) => await Run<GlobRequest>(toolCall.Arguments),
nameof(FileReaderRequest) => await Run<FileReaderRequest>(toolCall.Arguments),
_ => throw new ArgumentException($"Unsupported tool: {toolCall.Name}")
};
}
Expand Down
6 changes: 6 additions & 0 deletions src/Cellm/packages.lock.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@
"Microsoft.Extensions.Primitives": "8.0.0"
}
},
"PdfPig": {
"type": "Direct",
"requested": "[0.1.9, )",
"resolved": "0.1.9",
"contentHash": "VU27oq5O0rpD+zWiD639xXkSHtsNXqbPpYV0pzYg9VPhDibH1cq1i67f9DuW2WnzDVEti0HipLcZxDhdayIzGg=="
},
"Sentry.Extensions.Logging": {
"type": "Direct",
"requested": "[4.10.2, )",
Expand Down