Skip to content

Commit

Permalink
feat: Resource and ResourceConfig (#144)
Browse files Browse the repository at this point in the history
  • Loading branch information
Autoparallel authored Nov 29, 2024
1 parent 3ca6a21 commit 30bf2eb
Show file tree
Hide file tree
Showing 18 changed files with 309 additions and 62 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repository = "https://github.com/autoparallel/learner"

[workspace.dependencies]
# local
learner = { path = "crates/learner", version = "=0.8.6" }
learner = { path = "crates/learner", version = "=0.9.0" }

# shared dependencies
chrono = { version = "0.4", features = ["serde"] }
Expand Down
2 changes: 1 addition & 1 deletion crates/learner/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ license.workspace = true
name = "learner"
readme.workspace = true
repository.workspace = true
version = "0.8.6"
version = "0.9.0"

[dependencies]
async-trait = { workspace = true }
Expand Down
11 changes: 11 additions & 0 deletions crates/learner/config/resource/thesis.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
type_name = "thesis"

[fields]
abstract = { type = "string", required = false }
author = { type = "array", items = "string", required = true }
committee = { type = "array", items = "string", required = false }
defense_date = { type = "string", format = "date-time", required = false }
department = { type = "string", required = false, default = "Computer Science" }
keywords = { type = "array", items = "string", required = false }
title = { type = "string", required = true }
university = { type = "string", required = true }
6 changes: 3 additions & 3 deletions crates/learner/src/database/instruction/add.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
//! ```no_run
//! use learner::{
//! database::{Add, Database, Query},
//! paper::Paper,
//! prelude::*,
//! resource::Paper,
//! Learner,
//! };
//!
Expand Down Expand Up @@ -98,7 +98,7 @@ impl<'a> Add<'a> {
///
/// ```no_run
/// # use learner::database::Add;
/// # use learner::paper::Paper;
/// # use learner::resource::Paper;
/// # use learner::Learner;
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
/// # let learner = Learner::builder().build().await?;
Expand All @@ -125,7 +125,7 @@ impl<'a> Add<'a> {
///
/// ```no_run
/// # use learner::database::Add;
/// # use learner::paper::Paper;
/// # use learner::resource::Paper;
/// # use learner::Learner;
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
/// # let learner = Learner::builder().build().await?;
Expand Down
2 changes: 1 addition & 1 deletion crates/learner/src/database/instruction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
//! ```no_run
//! use learner::{
//! database::{Add, Database, Query, Remove},
//! paper::Paper,
//! prelude::*,
//! resource::Paper,
//! Learner,
//! };
//!
Expand Down
2 changes: 1 addition & 1 deletion crates/learner/src/database/instruction/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ impl<'a> Query<'a> {
///
/// ```no_run
/// # use learner::database::Query;
/// # use learner::{Learner, paper::Paper};
/// # use learner::{Learner, resource::Paper};
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
/// let learner = Learner::builder().build().await?;
/// let paper = learner.retriever.get_paper("2301.07041").await?;
Expand Down
2 changes: 1 addition & 1 deletion crates/learner/src/database/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
//! ```no_run
//! use learner::{
//! database::{Add, Database, Query},
//! paper::Paper,
//! prelude::*,
//! resource::Paper,
//! Learner,
//! };
//!
Expand Down
31 changes: 30 additions & 1 deletion crates/learner/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
//! # Examples
//!
//! ```
//! use learner::{error::LearnerError, paper::Paper, Learner};
//! use learner::{error::LearnerError, resource::Paper, Learner};
//! // or `use learner::prelude::*` to bring in the error type
//!
//! # async fn example() -> Result<(), LearnerError> {
Expand Down Expand Up @@ -235,4 +235,33 @@ pub enum LearnerError {
/// ```
#[error("{0}")]
Config(String),

/// Errors when parsing or working with JSON data.
///
/// This error variant wraps errors from serde_json, which can occur during:
/// - Serialization of Rust types to JSON
/// - Deserialization of JSON to Rust types
/// - JSON value manipulation and transformation
///
/// Common scenarios include:
/// - Invalid JSON syntax
/// - Type mismatches during deserialization
/// - Missing required fields
/// - Numeric conversion failures
#[error(transparent)]
SerdeJson(#[from] serde_json::Error),

/// Indicates a resource failed to serialize into a valid structure.
///
/// This error occurs when attempting to serialize a resource type
/// into JSON and the result is not a simple object structure. This
/// typically happens when:
/// - The resource type contains complex nested structures
/// - The resource serializes to a JSON array instead of an object
/// - The resource serializes to a primitive value
///
/// The error helps ensure that resources maintain a flat, searchable
/// structure that can be properly stored and queried in the database.
#[error("A resource must serialize into a flat Rust struct or JSON object.")]
InvalidResource,
}
17 changes: 11 additions & 6 deletions crates/learner/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
//! ```no_run
//! use learner::{
//! database::{Add, OrderField, Query},
//! paper::Paper,
//! prelude::*,
//! resource::Paper,
//! Learner,
//! };
//!
Expand Down Expand Up @@ -155,7 +155,6 @@ use std::{
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use paper::{Author, Paper};
use regex::Regex;
use reqwest::Url;
use serde::{Deserialize, Serialize};
Expand All @@ -169,10 +168,15 @@ pub mod retriever;
pub mod error;
pub mod format;
pub mod llm;
pub mod paper;
pub mod pdf;
pub mod resource;

use crate::{database::*, error::*, retriever::*};
use crate::{
database::*,
error::*,
resource::{Author, Paper},
retriever::*,
};

/// ArXiv default configuration
pub const ARXIV_CONFIG: &str = include_str!("../config/retrievers/arxiv.toml");
Expand All @@ -195,8 +199,8 @@ pub const IACR_CONFIG: &str = include_str!("../config/retrievers/iacr.toml");
/// ```no_run
/// use learner::{
/// database::{Add, Database},
/// paper::Paper,
/// prelude::*,
/// resource::Paper,
/// Learner,
/// };
///
Expand All @@ -211,7 +215,8 @@ pub const IACR_CONFIG: &str = include_str!("../config/retrievers/iacr.toml");
/// ```
pub mod prelude {
pub use crate::{
database::DatabaseInstruction, error::LearnerError, retriever::ResponseProcessor,
database::DatabaseInstruction, error::LearnerError, resource::Resource,
retriever::ResponseProcessor,
};
}

Expand Down
195 changes: 195 additions & 0 deletions crates/learner/src/resource/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
//! Resource abstraction and configuration for the learner library.
//!
//! This module provides the core abstractions for working with different types of academic
//! and research resources. It defines:
//!
//! - A [`Resource`] trait that all resource types must implement
//! - A flexible [`ResourceConfig`] for runtime-configured resource types
//! - Common utility types and functions for resource management
//!
//! The design allows for both statically defined resource types (like papers and books)
//! and dynamically configured resources that can be defined through configuration files.
//!
//! # Examples
//!
//! ```rust,no_run
//! use learner::{
//! resource::{Paper, Resource, ResourceConfig},
//! Learner,
//! };
//! use serde_json::json;
//!
//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
//! // Using a built-in resource type
//! let learner = Learner::builder().build().await?;
//! let paper = learner.retriever.get_paper("2301.07041").await?;
//!
//! // Access resource fields
//! let fields = paper.fields()?;
//! println!("Paper type: {}", paper.resource_type());
//!
//! // Or create a custom resource type at runtime
//! let mut fields = serde_json::Map::new();
//! fields.insert("title".into(), json!("My Thesis"));
//! fields.insert("university".into(), json!("Tech University"));
//!
//! let thesis = ResourceConfig { type_name: "thesis".to_string(), fields };
//! # Ok(())
//! # }
//! ```
use serde_json::{Map, Value};

use super::*;

mod paper;
mod shared;

pub use paper::*;
pub use shared::*;

/// Core trait that defines the behavior of a resource in the system.
///
/// This trait provides a common interface for all resource types, whether they are
/// statically defined (like [`Paper`]) or dynamically configured through [`ResourceConfig`].
/// It requires that implementing types can be serialized and deserialized, which enables
/// persistent storage and retrieval.
///
/// The trait provides two key capabilities:
/// - Identification of the resource type
/// - Access to the resource's fields in a uniform way
///
/// # Examples
///
/// ```rust
/// # use serde::{Serialize, Deserialize};
/// # use learner::resource::Resource;
/// #[derive(Serialize, Deserialize)]
/// struct Book {
/// title: String,
/// author: String,
/// isbn: String,
/// }
///
/// impl Resource for Book {
/// fn resource_type(&self) -> String { "book".to_string() }
/// }
/// ```
pub trait Resource: Serialize + for<'de> Deserialize<'de> {
/// Returns the type identifier for this resource.
///
/// This identifier is used to distinguish between different types of resources
/// in the system. For example, "paper", "book", or "thesis".
fn resource_type(&self) -> String;

/// Returns a map of field names to their values for this resource.
///
/// This method provides a uniform way to access a resource's fields regardless
/// of the concrete type. The default implementation uses serde to serialize
/// the resource to JSON and extract its fields.
///
/// # Errors
///
/// Returns [`LearnerError::InvalidResource`] if the resource cannot be serialized
/// to a JSON object.
fn fields(&self) -> Result<Map<String, Value>> {
serde_json::to_value(self)?.as_object().cloned().ok_or_else(|| LearnerError::InvalidResource)
}
}

/// A dynamically configured resource type.
///
/// This struct enables the creation of new resource types at runtime through
/// configuration files. It provides a flexible way to extend the system without
/// requiring code changes.
///
/// The type consists of:
/// - A type identifier string
/// - A map of field names to their values
///
/// # Examples
///
/// ```rust
/// use learner::resource::ResourceConfig;
/// use serde_json::{json, Map};
///
/// let mut fields = Map::new();
/// fields.insert("title".into(), json!("Understanding Type Systems"));
/// fields.insert("university".into(), json!("Tech University"));
///
/// let thesis = ResourceConfig { type_name: "thesis".to_string(), fields };
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResourceConfig {
/// The type identifier for this resource configuration
pub type_name: String,
/// Map of field names to their values
pub fields: Map<String, Value>,
}

impl Resource for ResourceConfig {
fn resource_type(&self) -> String { self.type_name.clone() }

fn fields(&self) -> Result<Map<String, Value>> { Ok(self.fields.clone()) }
}

#[cfg(test)]
mod tests {
use serde_json::json;

use super::*;

#[test]
fn test_thesis_resource() -> Result<()> {
// Create a thesis resource
let mut fields = Map::new();
fields.insert("title".into(), json!("Understanding Quantum Computing Effects"));
fields.insert("author".into(), json!(["Alice Researcher", "Bob Scientist"]));
fields.insert("university".into(), json!("Tech University"));
fields.insert("department".into(), json!("Computer Science"));
fields.insert("defense_date".into(), json!("2024-06-15T14:00:00Z"));
fields.insert(
"committee".into(),
json!(["Prof. Carol Chair", "Dr. David Member", "Dr. Eve External"]),
);
fields
.insert("keywords".into(), json!(["quantum computing", "decoherence", "error correction"]));

let thesis = ResourceConfig { type_name: "thesis".to_string(), fields };

// Test resource_type
assert_eq!(thesis.resource_type(), "thesis");

// Test fields method
let fields = thesis.fields()?;

// Verify we can access specific fields with proper types
assert!(fields.get("title").unwrap().is_string());
assert!(fields.get("author").unwrap().as_array().unwrap().len() == 2);

// Test JSON serialization/deserialization roundtrip
let serialized = serde_json::to_string(&thesis)?;
let deserialized: ResourceConfig = serde_json::from_str(&serialized)?;
assert_eq!(thesis.fields.get("title"), deserialized.fields.get("title"));

Ok(())
}

#[test]
fn test_thesis_from_toml() -> Result<()> {
let toml_str = include_str!("../../config/resource/thesis.toml");
let config: ResourceConfig = toml::from_str(toml_str)?;
dbg!(&config);

assert_eq!(config.resource_type(), "thesis");

// Test that we can access the field definitions
let fields = config.fields()?;
dbg!(&fields);
assert!(fields.contains_key("title"));
assert!(fields.contains_key("author"));
assert!(fields.contains_key("university"));

Ok(())
}
}
Loading

0 comments on commit 30bf2eb

Please sign in to comment.