-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
151 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
use crate::{ | ||
db::{self, DatabaseConnection}, | ||
stelae::{archive::Archive, types::repositories::Repository}, | ||
}; | ||
use sophia::{api::{ns::{rdf, NsTerm}, prelude::*, term::SimpleTerm}, turtle::parser::turtle::TurtleParser}; | ||
use sophia::inmem::graph::LightGraph; | ||
use sophia::xml::parser; | ||
use sophia::turtle::serializer::nt::NtSerializer; | ||
use sophia::{api::ns::Namespace, inmem::graph::FastGraph}; | ||
use std::path::{Path, PathBuf}; | ||
use walkdir::WalkDir; | ||
/// Inserts changes from the archive into the database | ||
/// | ||
/// # Errors | ||
/// Errors if the changes cannot be inserted into the archive | ||
// const NAMESPACE_URL: &str = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#"; | ||
// const OLL: Namespace<&str> = Namespace::new(NAMESPACE_URL).unwrap(); | ||
|
||
// const DOCUMENT_VERSION: NsTerm = OLL.get("DocumentVersion").unwrap(); | ||
|
||
#[actix_web::main] | ||
pub async fn insert( | ||
raw_archive_path: &str, | ||
archive_path: PathBuf, | ||
stele: Option<String>, | ||
) -> std::io::Result<()> { | ||
let conn = match db::init::connect(&archive_path).await { | ||
Ok(conn) => conn, | ||
Err(err) => { | ||
tracing::error!( | ||
"error: could not connect to database. Confirm that DATABASE_URL env var is set correctly." | ||
); | ||
tracing::error!("Error: {:?}", err); | ||
std::process::exit(1); | ||
} | ||
}; | ||
if let Some(stele) = stele { | ||
insert_changes_single_stele()?; | ||
} else { | ||
insert_changes_archive(&conn, raw_archive_path, &archive_path).unwrap_or_else(|err| { | ||
tracing::error!("Failed to insert changes into archive"); | ||
tracing::error!("{:?}", err); | ||
}); | ||
} | ||
Ok(()) | ||
} | ||
|
||
fn insert_changes_single_stele() -> std::io::Result<()> { | ||
Ok(()) | ||
} | ||
|
||
/// Insert changes from the archive into the database | ||
fn insert_changes_archive( | ||
conn: &DatabaseConnection, | ||
raw_archive_path: &str, | ||
archive_path: &Path, | ||
) -> anyhow::Result<()> { | ||
let archive = Archive::parse( | ||
archive_path.to_path_buf(), | ||
&PathBuf::from(raw_archive_path), | ||
false, | ||
)?; | ||
|
||
for (name, mut stele) in archive.stelae { | ||
if let Some(repositories) = stele.get_repositories()? { | ||
let Some(rdf_repo) = repositories.get_rdf_repository() else { | ||
continue; | ||
}; | ||
let rdf_repo_path = archive_path.to_path_buf().join(&rdf_repo.name); | ||
if !rdf_repo_path.exists() { | ||
anyhow::bail!( | ||
"RDF repository should exist on disk but not found: {}", | ||
rdf_repo_path.display() | ||
); | ||
} | ||
insert_changes_from_rdf_repository(conn, rdf_repo_path, &name, rdf_repo)?; | ||
} | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// Insert changes from the RDF repository into the database | ||
fn insert_changes_from_rdf_repository( | ||
conn: &DatabaseConnection, | ||
rdf_repo_path: PathBuf, | ||
name: &str, | ||
rdf_repo: &Repository, | ||
) -> anyhow::Result<()> { | ||
tracing::info!("Inserting changes from RDF repository: {}", name); | ||
tracing::info!("RDF repository path: {}", rdf_repo_path.display()); | ||
|
||
// let response = reqwest::get(NAMESPACE_URL).await?.text().await?; | ||
let mut graph = FastGraph::new(); | ||
|
||
for entry in WalkDir::new(&rdf_repo_path) { | ||
match entry { | ||
Ok(entry) if is_rdf(&entry) => { | ||
tracing::debug!("Parsing file: {:?}", entry.path()); | ||
let file = std::fs::File::open(entry.path())?; | ||
let reader = std::io::BufReader::new(file); | ||
parser::parse_bufread(reader).add_to_graph(&mut graph)?; | ||
} | ||
Ok(entry) => { | ||
tracing::debug!("Skipping non-RDF file: {:?}", entry.path()); | ||
continue; | ||
} | ||
Err(err) => { | ||
tracing::error!("Error reading file: {:?}", err); | ||
} | ||
} | ||
} | ||
let namespace_url = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#"; | ||
let oll: Namespace<&str> = Namespace::new(namespace_url).unwrap(); | ||
|
||
let document_version: NsTerm = oll.get("DocumentVersion").unwrap(); | ||
let doc_id = oll.get("docId").unwrap(); | ||
// println!("Graph: {:?}", graph); | ||
let documents = graph.triples_matching(Any, Any, [document_version]); | ||
|
||
for triple in documents { | ||
let triple = triple.unwrap(); | ||
let document = triple.s(); | ||
|
||
let mut doc_id_triples = graph.triples_matching( | ||
[document], | ||
[doc_id], | ||
Any, | ||
); | ||
for t in doc_id_triples { | ||
// println!("t: {:?} {:?} {:?}", t?.s(), t?.p(), t?.o()); | ||
let object = t?.o(); | ||
if let SimpleTerm::LiteralDatatype(value, _) = object { | ||
dbg!(&value); | ||
} | ||
|
||
} | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// Check if the entry is an RDF file | ||
fn is_rdf(entry: &walkdir::DirEntry) -> bool { | ||
entry.path().extension() == Some("rdf".as_ref()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pub mod changes; | ||
pub mod rdf_namespaces; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
// use sophia::api::ns::Namespace; | ||
|
||
// const NAMESPACE_URL: &str = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#"; | ||
|
||
// pub const OLL: Namespace = Namespace::new(NAMESPACE_URL).unwrap(); |