-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
30bf2eb
commit dd5e764
Showing
10 changed files
with
255 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[package] | ||
edition = "2021" | ||
name = "learner-sdk" | ||
version = "0.1.0" | ||
|
||
[dependencies] | ||
clap = { workspace = true } | ||
learner = { workspace = true } | ||
tempfile = { workspace = true } | ||
tokio = { workspace = true } | ||
toml = { workspace = true } | ||
tracing = { workspace = true } | ||
tracing-subscriber = { workspace = true } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
mod validate; | ||
|
||
use std::path::PathBuf; | ||
|
||
use clap::{Parser, Subcommand}; | ||
use learner::prelude::*; | ||
use tracing::{debug, error, info, warn}; | ||
|
||
#[derive(Parser)] | ||
#[command(author, version, about, long_about = None)] | ||
struct LearnerSdk { | ||
#[command(subcommand)] | ||
command: Commands, | ||
} | ||
|
||
#[derive(Subcommand)] | ||
enum Commands { | ||
/// Validate a resource config | ||
ValidateResource { | ||
/// Path to the configuration file | ||
path: PathBuf, | ||
}, | ||
/// Validate a retriever config for an optional given input | ||
ValidateRetriever { | ||
/// Path to the configuration file | ||
path: PathBuf, | ||
|
||
/// Identifier or URL | ||
input: Option<String>, | ||
}, | ||
} | ||
|
||
#[tokio::main] | ||
async fn main() { | ||
tracing_subscriber::fmt() | ||
.without_time() | ||
.with_file(false) | ||
.with_line_number(false) | ||
.with_target(false) | ||
.with_max_level(tracing::Level::TRACE) | ||
.init(); | ||
|
||
let cli = LearnerSdk::parse(); | ||
|
||
match &cli.command { | ||
Commands::ValidateRetriever { path, input } => { | ||
info!("Validating retriever..."); | ||
if !path.exists() { | ||
error!("Path to retriever config was invalid.\nPath used: {path:?}"); | ||
return; | ||
} | ||
debug!("Validating retriever config at {:?}", path); | ||
validate::validate_retriever(path, input).await; | ||
}, | ||
Commands::ValidateResource { path } => { | ||
info!("Validating resource..."); | ||
if !path.exists() { | ||
error!("Path to resource config was invalid.\nPath used: {path:?}"); | ||
return; | ||
} | ||
debug!("Validating resource config at {:?}", path); | ||
validate::validate_resource(path); | ||
}, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
use std::fs::read_to_string; | ||
|
||
use learner::{ | ||
resource::ResourceConfig, | ||
retriever::{ResponseFormat, RetrieverConfig}, | ||
}; | ||
|
||
use super::*; | ||
|
||
pub fn validate_resource(path: &PathBuf) { | ||
let config_str = match read_to_string(path) { | ||
Ok(str) => str, | ||
Err(e) => { | ||
error!("Failed to read config to string due to: {e:?}"); | ||
return; | ||
}, | ||
}; | ||
|
||
let resource: ResourceConfig = match toml::from_str(&config_str) { | ||
Ok(config) => config, | ||
Err(e) => { | ||
error!("Failed to parse config to string due to: {e:?}"); | ||
return; | ||
}, | ||
}; | ||
|
||
info!("Resource type: {}", resource.type_name); | ||
|
||
// Check all required fields are present | ||
debug!("All config fields are:\n{:#?}", resource.fields()); | ||
} | ||
|
||
pub async fn validate_retriever(path: &PathBuf, input: &Option<String>) { | ||
let config_str = match read_to_string(path) { | ||
Ok(str) => str, | ||
Err(e) => { | ||
error!("Failed to read config to string due to: {e:?}"); | ||
return; | ||
}, | ||
}; | ||
|
||
let retriever: RetrieverConfig = match toml::from_str(&config_str) { | ||
Ok(config) => config, | ||
Err(e) => { | ||
error!("Failed to parse config to string due to: {e:?}"); | ||
return; | ||
}, | ||
}; | ||
|
||
match &retriever.response_format { | ||
ResponseFormat::Xml(config) => { | ||
debug!("Retriever is configured for: XML\n{config:#?}") | ||
}, | ||
ResponseFormat::Json(config) => { | ||
debug!("Retriever is configured for: JSON\n{config:#?}") | ||
}, | ||
} | ||
|
||
if let Some(input) = input { | ||
info!("Attempting to match against pattern..."); | ||
match retriever.extract_identifier(input) { | ||
Ok(identifier) => info!("Retriever extracted input into: {identifier}"), | ||
Err(e) => { | ||
error!("Retriever failed to extract input due to: {e:?}"); | ||
return; | ||
}, | ||
} | ||
|
||
info!("Attempting to fetch paper using retriever..."); | ||
let paper = match retriever.retrieve_paper(input).await { | ||
Ok(paper) => { | ||
info!("Paper retrieved!\n{paper:#?}"); | ||
paper | ||
}, | ||
Err(e) => { | ||
error!("Retriever failed to retriever paper due to: {e:?}"); | ||
return; | ||
}, | ||
}; | ||
|
||
if paper.pdf_url.is_some() { | ||
info!("Attempting to download associated pdf"); | ||
let tempdir = tempfile::tempdir().unwrap(); | ||
match paper.download_pdf(tempdir.path()).await { | ||
Ok(filename) => { | ||
let pdf_filepath = tempdir.path().join(filename); | ||
if pdf_filepath.exists() { | ||
let bytes = std::fs::read(path).unwrap(); | ||
if bytes.is_empty() { | ||
error!("PDF download was empty."); | ||
} else { | ||
info!("Non-empty PDF downloaded successfully."); | ||
} | ||
} else { | ||
error!("PDF path did not end up getting written.") | ||
} | ||
}, | ||
Err(e) => { | ||
error!("PDF was unable to be downloaded due to: {e:?}") | ||
}, | ||
} | ||
} else { | ||
warn!( | ||
"PDF URL was not determined. Please check your configuration against the server response." | ||
); | ||
} | ||
} else { | ||
warn!( | ||
"No input string provided to further debug your `RetrieverConfig`. If you want to test \ | ||
identifier pattern matching and online fetching, please pass in an input string with an \ | ||
additional input, e.g., `2301.07041`." | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters