Skip to content

Commit

Permalink
fix: several pdf errors
Browse files Browse the repository at this point in the history
  • Loading branch information
0xMochan committed Oct 23, 2024
1 parent 5b00932 commit 3429b4a
Show file tree
Hide file tree
Showing 6 changed files with 513 additions and 75 deletions.
14 changes: 14 additions & 0 deletions rig-core/examples/loaders.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use rig::loaders::FileLoader;

#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
FileLoader::with_glob("cargo.toml")?
.read()
.into_iter()
.for_each(|result| match result {
Ok(content) => println!("{}", content),
Err(e) => eprintln!("Error reading file: {}", e),
});

Ok(())
}
194 changes: 170 additions & 24 deletions rig-core/src/loaders/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,12 @@ pub enum FileLoaderError {
GlobError(#[from] glob::GlobError),
}

pub struct FileLoader<'a, State> {
iterator: Box<dyn Iterator<Item = State> + 'a>,
}

// Implementing Readable trait for reading file contents
pub(crate) trait Readable {
fn read(self) -> Result<String, FileLoaderError>;
fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError>;
}

impl<'a> FileLoader<'a, Result<PathBuf, FileLoaderError>> {
pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
FileLoader {
iterator: Box::new(self.iterator.map(|res| res.read())),
}
}
pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
FileLoader {
iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
}
}
}

impl<'a> FileLoader<'a, PathBuf> {
pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
FileLoader {
Expand Down Expand Up @@ -71,16 +55,119 @@ impl<T: Readable> Readable for Result<T, FileLoaderError> {
}
}

// ## FileLoader definitions and implementations ##

/// `FileLoader` is a utility for loading files from the filesystem using glob patterns or directory paths.
/// It provides methods to read file contents and handle errors gracefully.
///
/// # Errors
///
/// This module defines a custom error type `FileLoaderError` which can represent various errors that might occur
/// during file loading operations, such as invalid glob patterns, IO errors, and glob errors.
///
/// # Example Usage
///
/// ```rust
/// use rig:loaders::FileLoader;
///
/// fn main() -> Result<(), Box<dyn std::error::Error>> {
/// // Create a FileLoader using a glob pattern
/// let loader = FileLoader::with_glob("path/to/files/*.txt")?;
///
/// // Read file contents, ignoring any errors
/// let contents: Vec<String> = loader
/// .read()
/// .ignore_errors()
/// .into_iter()
/// .collect();
///
/// for content in contents {
/// println!("{}", content);
/// }
///
/// Ok(())
/// }
/// ```
///
/// `FileLoader` uses strict typing between the iterator methods to ensure that transitions between
/// different implementations of the loaders and it's methods are handled properly by the compiler.
pub struct FileLoader<'a, T> {
iterator: Box<dyn Iterator<Item = T> + 'a>,
}

impl<'a> FileLoader<'a, Result<PathBuf, FileLoaderError>> {
/// Reads the contents of the files within the iterator returned by `with_glob` or `with_dir`.
///
/// # Example
/// Read files in directory "files/*.txt" and print the content for each file
///
/// ```rust
/// let content = FileLoader::with_glob(...)?.read().into_iter();
/// for result in content {
/// match result {
/// Ok(content) => println!("{}", content),
/// Err(e) => eprintln!("Error reading file: {}", e),
/// }
/// }
/// ```
pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
FileLoader {
iterator: Box::new(self.iterator.map(|res| res.read())),
}
}
/// Reads the contents of the files within the iterator returned by `with_glob` or `with_dir`
/// and returns the path along with the content.
///
/// # Example
/// Read files in directory "files/*.txt" and print the content for cooresponding path for each
/// file.
///
/// ```rust
/// let content = FileLoader::with_glob("files/*.txt")?.read().into_iter();
/// for (path, result) in content {
/// match result {
/// Ok((path, content)) => println!("{:?} {}", path, content),
/// Err(e) => eprintln!("Error reading file: {}", e),
/// }
/// }
/// ```
pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
FileLoader {
iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
}
}
}

impl<'a, T: 'a> FileLoader<'a, Result<T, FileLoaderError>> {
/// Ignores errors in the iterator, returning only successful results. This can be used on any
/// `FileLoader` state of iterator whose items are results.
///
/// # Example
/// Read files in directory "files/*.txt" and ignore errors from unreadable files.
///
/// ```rust
/// let content = FileLoader::with_glob("files/*.txt")?.read().ignore_errors().into_iter();
/// for result in content {
/// println!("{}", content)
/// }
/// ```
pub fn ignore_errors(self) -> FileLoader<'a, T> {
FileLoader {
iterator: Box::new(self.iterator.filter_map(|res| res.ok())),
}
}
}

impl<'a> FileLoader<'a, PathBuf> {
pub fn new(
impl<'a> FileLoader<'a, Result<PathBuf, FileLoaderError>> {
/// Creates a new `FileLoader` using a glob pattern to match files.
///
/// # Example
/// Create a `FileLoader` for all `.txt` files that match the glob "files/*.txt".
///
/// ```rust
/// let loader = FileLoader::with_glob("files/*.txt")?;
/// ```
pub fn with_glob(
pattern: &str,
) -> Result<FileLoader<Result<PathBuf, FileLoaderError>>, FileLoaderError> {
let paths = glob(pattern)?;
Expand All @@ -92,11 +179,70 @@ impl<'a> FileLoader<'a, PathBuf> {
),
})
}

/// Creates a new `FileLoader` on all files within a directory.
///
/// # Example
/// Create a `FileLoader` for all files that are in the directory "files".
///
/// ```rust
/// let loader = FileLoader::with_dir("files")?;
/// ```
pub fn with_dir(
directory: &str,
) -> Result<FileLoader<Result<PathBuf, FileLoaderError>>, FileLoaderError> {
Ok(FileLoader {
iterator: Box::new(fs::read_dir(directory)?.map(|entry| Ok(entry?.path()))),
})
}
}

impl<'a, State> FileLoader<'a, State> {
pub fn iter(self) -> Box<dyn Iterator<Item = State> + 'a> {
self.iterator
// Iterators for FileLoader

pub struct IntoIter<'a, T> {
iterator: Box<dyn Iterator<Item = T> + 'a>,
}

impl<'a, T> IntoIterator for FileLoader<'a, T> {
type Item = T;
type IntoIter = IntoIter<'a, T>;

fn into_iter(self) -> Self::IntoIter {
IntoIter {
iterator: self.iterator,
}
}
}

impl<'a, T> Iterator for IntoIter<'a, T> {
type Item = T;

fn next(&mut self) -> Option<Self::Item> {
self.iterator.next()
}
}

pub struct Iter<'a, T> {
iterator: std::slice::Iter<'a, T>,
}

impl<'a, T> Iterator for Iter<'a, T> {
type Item = &'a T;

fn next(&mut self) -> Option<Self::Item> {
self.iterator.next()
}
}

pub struct IterMut<'a, T> {
iterator: std::slice::IterMut<'a, T>,
}

impl<'a, T> Iterator for IterMut<'a, T> {
type Item = &'a mut T;

fn next(&mut self) -> Option<Self::Item> {
self.iterator.next()
}
}

Expand All @@ -120,12 +266,12 @@ mod tests {

let glob = temp.path().to_string_lossy().to_string() + "/*.txt";

let loader = FileLoader::new(&glob).unwrap();
let loader = FileLoader::with_glob(&glob).unwrap();
let mut actual = loader
.ignore_errors()
.read()
.ignore_errors()
.iter()
.into_iter()
.collect::<Vec<_>>();
let mut expected = vec!["foo".to_string(), "bar".to_string()];

Expand Down
5 changes: 5 additions & 0 deletions rig-core/src/loaders/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
pub mod file;

pub use file::FileLoader;

#[cfg(feature = "pdf")]
pub mod pdf;

#[cfg(feature = "pdf")]
pub use pdf::PdfFileLoader;
Loading

0 comments on commit 3429b4a

Please sign in to comment.