diff --git a/.dockerignore b/.dockerignore index f229611505534..a6274ffa46194 100644 --- a/.dockerignore +++ b/.dockerignore @@ -31,7 +31,7 @@ !api/doc/ !crates/indexer/migrations/**/*.sql !ecosystem/indexer-grpc/indexer-grpc-parser/migrations/**/*.sql -!ecosystem/nft-metadata-crawler-parser/migrations/**/*.sql +!ecosystem/nft-metadata-crawler/migrations/**/*.sql !rust-toolchain.toml !scripts/ !terraform/helm/aptos-node/ diff --git a/Cargo.lock b/Cargo.lock index 6a4b1d2d960cd..b46d9b2786968 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3082,7 +3082,7 @@ dependencies = [ ] [[package]] -name = "aptos-nft-metadata-crawler-parser" +name = "aptos-nft-metadata-crawler" version = "0.1.0" dependencies = [ "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 6e9d7aa06f4db..c7db5300cbd0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -125,7 +125,7 @@ members = [ "ecosystem/indexer-grpc/indexer-test-transactions", "ecosystem/indexer-grpc/indexer-transaction-generator", "ecosystem/indexer-grpc/transaction-filter", - "ecosystem/nft-metadata-crawler-parser", + "ecosystem/nft-metadata-crawler", "ecosystem/node-checker", "ecosystem/node-checker/fn-check-client", "execution/block-partitioner", @@ -392,7 +392,7 @@ aptos-network-benchmark = { path = "network/benchmark" } aptos-network-builder = { path = "network/builder" } aptos-network-checker = { path = "crates/aptos-network-checker" } aptos-network-discovery = { path = "network/discovery" } -aptos-nft-metadata-crawler-parser = { path = "ecosystem/nft-metadata-crawler-parser" } +aptos-nft-metadata-crawler = { path = "ecosystem/nft-metadata-crawler" } aptos-node = { path = "aptos-node" } aptos-node-checker = { path = "ecosystem/node-checker" } aptos-node-identity = { path = "crates/aptos-node-identity" } diff --git a/docker/builder/build-indexer.sh b/docker/builder/build-indexer.sh index 0bde16450815e..95bebee0a5de7 100755 --- a/docker/builder/build-indexer.sh +++ b/docker/builder/build-indexer.sh @@ -15,7 +15,7 @@ cargo build --locked --profile=$PROFILE \ -p aptos-indexer-grpc-cache-worker \ -p aptos-indexer-grpc-file-store \ -p aptos-indexer-grpc-data-service \ - -p aptos-nft-metadata-crawler-parser \ + -p aptos-nft-metadata-crawler \ -p aptos-indexer-grpc-file-store-backfiller \ "$@" @@ -24,7 +24,7 @@ BINS=( aptos-indexer-grpc-cache-worker aptos-indexer-grpc-file-store aptos-indexer-grpc-data-service - aptos-nft-metadata-crawler-parser + aptos-nft-metadata-crawler aptos-indexer-grpc-file-store-backfiller ) diff --git a/docker/builder/nft-metadata-crawler.Dockerfile b/docker/builder/nft-metadata-crawler.Dockerfile index 52eb69b8ec43a..1b1d6998740ad 100644 --- a/docker/builder/nft-metadata-crawler.Dockerfile +++ b/docker/builder/nft-metadata-crawler.Dockerfile @@ -16,7 +16,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ libpq-dev \ curl -COPY --link --from=indexer-builder /aptos/dist/aptos-nft-metadata-crawler-parser /usr/local/bin/aptos-nft-metadata-crawler-parser +COPY --link --from=indexer-builder /aptos/dist/aptos-nft-metadata-crawler /usr/local/bin/aptos-nft-metadata-crawler # The health check port EXPOSE 8080 diff --git a/ecosystem/nft-metadata-crawler-parser/src/config.rs b/ecosystem/nft-metadata-crawler-parser/src/config.rs deleted file mode 100644 index ed21836d1ce02..0000000000000 --- a/ecosystem/nft-metadata-crawler-parser/src/config.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright © Aptos Foundation -// SPDX-License-Identifier: Apache-2.0 - -use crate::{ - asset_uploader::AssetUploaderContext, - parser::ParserContext, - utils::{ - constants::{ - DEFAULT_IMAGE_QUALITY, DEFAULT_MAX_FILE_SIZE_BYTES, DEFAULT_MAX_IMAGE_DIMENSIONS, - DEFAULT_MAX_NUM_PARSE_RETRIES, - }, - database::{establish_connection_pool, run_migrations}, - }, -}; -use aptos_indexer_grpc_server_framework::RunnableConfig; -use axum::Router; -use diesel::{ - r2d2::{ConnectionManager, Pool}, - PgConnection, -}; -use enum_dispatch::enum_dispatch; -use serde::{Deserialize, Serialize}; -use tracing::info; - -/// Trait for building a router for axum -#[enum_dispatch] -pub trait Server: Send + Sync { - fn build_router(&self) -> Router; -} - -/// Required account data and auth keys for Cloudflare -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] -pub struct AssetUploaderConfig { - /// Cloudflare API key - pub cloudflare_auth_key: String, - /// Cloudflare Account ID provided at the images home page used to authenticate requests - pub cloudflare_account_id: String, - /// Cloudflare Account Hash provided at the images home page used for generating the CDN image URLs - pub cloudflare_account_hash: String, - /// Cloudflare Image Delivery URL prefix provided at the images home page used for generating the CDN image URLs - pub cloudflare_image_delivery_prefix: String, - /// In addition to on the fly transformations, Cloudflare images can be returned in preset variants. This is the default variant used with the saved CDN image URLs. - pub cloudflare_default_variant: String, -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] -pub struct ParserConfig { - pub google_application_credentials: Option, - pub bucket: String, - pub cdn_prefix: String, - pub ipfs_prefix: String, - pub ipfs_auth_key: Option, - #[serde(default = "NFTMetadataCrawlerConfig::default_max_file_size_bytes")] - pub max_file_size_bytes: u32, - #[serde(default = "NFTMetadataCrawlerConfig::default_image_quality")] - pub image_quality: u8, // Quality up to 100 - #[serde(default = "NFTMetadataCrawlerConfig::default_max_image_dimensions")] - pub max_image_dimensions: u32, - #[serde(default)] - pub ack_parsed_uris: bool, - #[serde(default)] - pub uri_blacklist: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type")] -pub enum ServerConfig { - Parser(ParserConfig), - AssetUploader(AssetUploaderConfig), -} - -/// Structs to hold config from YAML -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] -pub struct NFTMetadataCrawlerConfig { - pub database_url: String, - #[serde(default = "NFTMetadataCrawlerConfig::default_max_num_parse_retries")] - pub max_num_parse_retries: i32, - pub server_port: u16, - pub server_config: ServerConfig, -} - -impl NFTMetadataCrawlerConfig { - pub const fn default_max_file_size_bytes() -> u32 { - DEFAULT_MAX_FILE_SIZE_BYTES - } - - pub const fn default_image_quality() -> u8 { - DEFAULT_IMAGE_QUALITY - } - - pub const fn default_max_image_dimensions() -> u32 { - DEFAULT_MAX_IMAGE_DIMENSIONS - } - - pub const fn default_max_num_parse_retries() -> i32 { - DEFAULT_MAX_NUM_PARSE_RETRIES - } -} - -#[derive(Clone)] -#[enum_dispatch(Server)] -pub enum ServerContext { - Parser(ParserContext), - AssetUploader(AssetUploaderContext), -} - -impl ServerConfig { - pub async fn build_context( - &self, - pool: Pool>, - max_num_retries: i32, - ) -> ServerContext { - match self { - ServerConfig::Parser(parser_config) => ServerContext::Parser( - ParserContext::new(parser_config.clone(), pool, max_num_retries).await, - ), - ServerConfig::AssetUploader(asset_uploader_config) => ServerContext::AssetUploader( - AssetUploaderContext::new(asset_uploader_config.clone(), pool), - ), - } - } -} - -#[async_trait::async_trait] -impl RunnableConfig for NFTMetadataCrawlerConfig { - /// Main driver function that establishes a connection to Pubsub and parses the Pubsub entries in parallel - async fn run(&self) -> anyhow::Result<()> { - info!( - "[NFT Metadata Crawler] Starting parser with config: {:?}", - self - ); - - info!("[NFT Metadata Crawler] Connecting to database"); - let pool = establish_connection_pool(&self.database_url); - info!("[NFT Metadata Crawler] Database connection successful"); - - info!("[NFT Metadata Crawler] Running migrations"); - run_migrations(&pool); - info!("[NFT Metadata Crawler] Finished migrations"); - - // Create request context - let context = self - .server_config - .build_context(pool, self.max_num_parse_retries) - .await; - let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", self.server_port)) - .await - .expect("Failed to bind TCP listener"); - axum::serve(listener, context.build_router()).await.unwrap(); - - Ok(()) - } - - fn get_server_name(&self) -> String { - "parser".to_string() - } -} diff --git a/ecosystem/nft-metadata-crawler-parser/.gitignore b/ecosystem/nft-metadata-crawler/.gitignore similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/.gitignore rename to ecosystem/nft-metadata-crawler/.gitignore diff --git a/ecosystem/nft-metadata-crawler-parser/Cargo.toml b/ecosystem/nft-metadata-crawler/Cargo.toml similarity index 92% rename from ecosystem/nft-metadata-crawler-parser/Cargo.toml rename to ecosystem/nft-metadata-crawler/Cargo.toml index 21aa48782c2f8..0301bd8944289 100644 --- a/ecosystem/nft-metadata-crawler-parser/Cargo.toml +++ b/ecosystem/nft-metadata-crawler/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "aptos-nft-metadata-crawler-parser" -description = "NFT Metadata Crawler Parser service." +name = "aptos-nft-metadata-crawler" +description = "NFT Metadata Crawler related services." version = "0.1.0" # Workspace inherited keys diff --git a/ecosystem/nft-metadata-crawler-parser/diesel.toml b/ecosystem/nft-metadata-crawler/diesel.toml similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/diesel.toml rename to ecosystem/nft-metadata-crawler/diesel.toml diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/00000000000000_diesel_initial_setup/down.sql b/ecosystem/nft-metadata-crawler/migrations/00000000000000_diesel_initial_setup/down.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/00000000000000_diesel_initial_setup/down.sql rename to ecosystem/nft-metadata-crawler/migrations/00000000000000_diesel_initial_setup/down.sql diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/00000000000000_diesel_initial_setup/up.sql b/ecosystem/nft-metadata-crawler/migrations/00000000000000_diesel_initial_setup/up.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/00000000000000_diesel_initial_setup/up.sql rename to ecosystem/nft-metadata-crawler/migrations/00000000000000_diesel_initial_setup/up.sql diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/2023-09-08-001532_create_tables/down.sql b/ecosystem/nft-metadata-crawler/migrations/2023-09-08-001532_create_tables/down.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/2023-09-08-001532_create_tables/down.sql rename to ecosystem/nft-metadata-crawler/migrations/2023-09-08-001532_create_tables/down.sql diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/2023-09-08-001532_create_tables/up.sql b/ecosystem/nft-metadata-crawler/migrations/2023-09-08-001532_create_tables/up.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/2023-09-08-001532_create_tables/up.sql rename to ecosystem/nft-metadata-crawler/migrations/2023-09-08-001532_create_tables/up.sql diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/2024-01-31-221845_add_not_parsable_column/down.sql b/ecosystem/nft-metadata-crawler/migrations/2024-01-31-221845_add_not_parsable_column/down.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/2024-01-31-221845_add_not_parsable_column/down.sql rename to ecosystem/nft-metadata-crawler/migrations/2024-01-31-221845_add_not_parsable_column/down.sql diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/2024-01-31-221845_add_not_parsable_column/up.sql b/ecosystem/nft-metadata-crawler/migrations/2024-01-31-221845_add_not_parsable_column/up.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/2024-01-31-221845_add_not_parsable_column/up.sql rename to ecosystem/nft-metadata-crawler/migrations/2024-01-31-221845_add_not_parsable_column/up.sql diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/2024-02-08-013147_add_last_transaction_version/down.sql b/ecosystem/nft-metadata-crawler/migrations/2024-02-08-013147_add_last_transaction_version/down.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/2024-02-08-013147_add_last_transaction_version/down.sql rename to ecosystem/nft-metadata-crawler/migrations/2024-02-08-013147_add_last_transaction_version/down.sql diff --git a/ecosystem/nft-metadata-crawler-parser/migrations/2024-02-08-013147_add_last_transaction_version/up.sql b/ecosystem/nft-metadata-crawler/migrations/2024-02-08-013147_add_last_transaction_version/up.sql similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/migrations/2024-02-08-013147_add_last_transaction_version/up.sql rename to ecosystem/nft-metadata-crawler/migrations/2024-02-08-013147_add_last_transaction_version/up.sql diff --git a/ecosystem/nft-metadata-crawler/src/asset_uploader/config.rs b/ecosystem/nft-metadata-crawler/src/asset_uploader/config.rs new file mode 100644 index 0000000000000..3d019f4899f21 --- /dev/null +++ b/ecosystem/nft-metadata-crawler/src/asset_uploader/config.rs @@ -0,0 +1,20 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +/// Required account data and auth keys for Cloudflare +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct AssetUploaderConfig { + /// Cloudflare API key + pub cloudflare_auth_key: String, + /// Cloudflare Account ID provided at the images home page used to authenticate requests + pub cloudflare_account_id: String, + /// Cloudflare Account Hash provided at the images home page used for generating the CDN image URLs + pub cloudflare_account_hash: String, + /// Cloudflare Image Delivery URL prefix provided at the images home page used for generating the CDN image URLs + pub cloudflare_image_delivery_prefix: String, + /// In addition to on the fly transformations, Cloudflare images can be returned in preset variants. This is the default variant used with the saved CDN image URLs. + pub cloudflare_default_variant: String, +} diff --git a/ecosystem/nft-metadata-crawler-parser/src/asset_uploader.rs b/ecosystem/nft-metadata-crawler/src/asset_uploader/mod.rs similarity index 96% rename from ecosystem/nft-metadata-crawler-parser/src/asset_uploader.rs rename to ecosystem/nft-metadata-crawler/src/asset_uploader/mod.rs index 70bf41b7b6acb..1ec5bf564a1dd 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/asset_uploader.rs +++ b/ecosystem/nft-metadata-crawler/src/asset_uploader/mod.rs @@ -2,8 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - config::{AssetUploaderConfig, Server}, - models::nft_metadata_crawler_uris::NFTMetadataCrawlerURIs, + asset_uploader::config::AssetUploaderConfig, + config::Server, + models::parsed_asset_uris::ParsedAssetUris, utils::{ constants::{MAX_ASSET_UPLOAD_RETRY_SECONDS, MAX_RETRY_TIME_SECONDS}, database::upsert_uris, @@ -23,6 +24,8 @@ use std::{sync::Arc, time::Duration}; use tracing::{info, warn}; use url::Url; +pub mod config; + #[derive(Clone)] pub struct AssetUploaderContext { pub asset_uploader_config: Arc, @@ -141,7 +144,7 @@ impl AssetUploaderContext { cdn_uri = cdn_url, "[Asset Uploader] Writing to Postgres" ); - let mut model = NFTMetadataCrawlerURIs::new(url.as_ref()); + let mut model = ParsedAssetUris::new(url.as_ref()); model.set_cdn_image_uri(Some(cdn_url.clone())); let mut conn = self_clone.pool.get().context("Failed to get connection")?; diff --git a/ecosystem/nft-metadata-crawler/src/config.rs b/ecosystem/nft-metadata-crawler/src/config.rs new file mode 100644 index 0000000000000..ab294aafff65b --- /dev/null +++ b/ecosystem/nft-metadata-crawler/src/config.rs @@ -0,0 +1,93 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + asset_uploader::{config::AssetUploaderConfig, AssetUploaderContext}, + parser::{config::ParserConfig, ParserContext}, + utils::database::{establish_connection_pool, run_migrations}, +}; +use aptos_indexer_grpc_server_framework::RunnableConfig; +use axum::Router; +use diesel::{ + r2d2::{ConnectionManager, Pool}, + PgConnection, +}; +use enum_dispatch::enum_dispatch; +use serde::{Deserialize, Serialize}; +use tokio::net::TcpListener; +use tracing::info; + +/// Trait for building a router for axum +#[enum_dispatch] +pub trait Server: Send + Sync { + fn build_router(&self) -> Router; +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type")] +pub enum ServerConfig { + Parser(ParserConfig), + AssetUploader(AssetUploaderConfig), +} + +/// Structs to hold config from YAML +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct NFTMetadataCrawlerConfig { + pub database_url: String, + pub server_port: u16, + pub server_config: ServerConfig, +} + +#[derive(Clone)] +#[enum_dispatch(Server)] +pub enum ServerContext { + Parser(ParserContext), + AssetUploader(AssetUploaderContext), +} + +impl ServerConfig { + pub async fn build_context( + &self, + pool: Pool>, + ) -> ServerContext { + match self { + ServerConfig::Parser(parser_config) => { + ServerContext::Parser(ParserContext::new(parser_config.clone(), pool).await) + }, + ServerConfig::AssetUploader(asset_uploader_config) => ServerContext::AssetUploader( + AssetUploaderContext::new(asset_uploader_config.clone(), pool), + ), + } + } +} + +#[async_trait::async_trait] +impl RunnableConfig for NFTMetadataCrawlerConfig { + /// Main driver function that establishes a connection to Pubsub and parses the Pubsub entries in parallel + async fn run(&self) -> anyhow::Result<()> { + info!("[NFT Metadata Crawler] Starting with config: {:?}", self); + + info!("[NFT Metadata Crawler] Connecting to database"); + let pool = establish_connection_pool(&self.database_url); + info!("[NFT Metadata Crawler] Database connection successful"); + + info!("[NFT Metadata Crawler] Running migrations"); + run_migrations(&pool); + info!("[NFT Metadata Crawler] Finished migrations"); + + // Create request context + let context = self.server_config.build_context(pool).await; + let listener = TcpListener::bind(format!("0.0.0.0:{}", self.server_port)).await?; + axum::serve(listener, context.build_router()).await?; + + Ok(()) + } + + fn get_server_name(&self) -> String { + match &self.server_config { + ServerConfig::Parser(_) => "parser".to_string(), + ServerConfig::AssetUploader(_) => "asset_uploader".to_string(), + } + } +} diff --git a/ecosystem/nft-metadata-crawler-parser/src/lib.rs b/ecosystem/nft-metadata-crawler/src/lib.rs similarity index 98% rename from ecosystem/nft-metadata-crawler-parser/src/lib.rs rename to ecosystem/nft-metadata-crawler/src/lib.rs index c75310b61093e..acabe0aeba90c 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/lib.rs +++ b/ecosystem/nft-metadata-crawler/src/lib.rs @@ -12,7 +12,6 @@ pub mod models; pub mod parser; pub mod schema; pub mod utils; -pub mod worker; /// HEAD request to get MIME type and size of content pub async fn get_uri_metadata(url: &str) -> anyhow::Result<(String, u32)> { diff --git a/ecosystem/nft-metadata-crawler-parser/src/main.rs b/ecosystem/nft-metadata-crawler/src/main.rs similarity index 79% rename from ecosystem/nft-metadata-crawler-parser/src/main.rs rename to ecosystem/nft-metadata-crawler/src/main.rs index 9c050bce8f154..b889dd54638ea 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/main.rs +++ b/ecosystem/nft-metadata-crawler/src/main.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use aptos_indexer_grpc_server_framework::ServerArgs; -use aptos_nft_metadata_crawler_parser::config::NFTMetadataCrawlerConfig; +use aptos_nft_metadata_crawler::config::NFTMetadataCrawlerConfig; #[tokio::main] async fn main() -> anyhow::Result<()> { diff --git a/ecosystem/nft-metadata-crawler-parser/src/models/ledger_info.rs b/ecosystem/nft-metadata-crawler/src/models/ledger_info.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/models/ledger_info.rs rename to ecosystem/nft-metadata-crawler/src/models/ledger_info.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/models/mod.rs b/ecosystem/nft-metadata-crawler/src/models/mod.rs similarity index 55% rename from ecosystem/nft-metadata-crawler-parser/src/models/mod.rs rename to ecosystem/nft-metadata-crawler/src/models/mod.rs index a034b1a23f72d..92f8bc964516f 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/models/mod.rs +++ b/ecosystem/nft-metadata-crawler/src/models/mod.rs @@ -2,5 +2,5 @@ // SPDX-License-Identifier: Apache-2.0 pub mod ledger_info; -pub mod nft_metadata_crawler_uris; -pub mod nft_metadata_crawler_uris_query; +pub mod parsed_asset_uris; +pub mod parsed_asset_uris_query; diff --git a/ecosystem/nft-metadata-crawler-parser/src/models/nft_metadata_crawler_uris.rs b/ecosystem/nft-metadata-crawler/src/models/parsed_asset_uris.rs similarity index 95% rename from ecosystem/nft-metadata-crawler-parser/src/models/nft_metadata_crawler_uris.rs rename to ecosystem/nft-metadata-crawler/src/models/parsed_asset_uris.rs index c991362feea80..e3ce82abcfdc6 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/models/nft_metadata_crawler_uris.rs +++ b/ecosystem/nft-metadata-crawler/src/models/parsed_asset_uris.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - models::nft_metadata_crawler_uris_query::NFTMetadataCrawlerURIsQuery, + models::parsed_asset_uris_query::ParsedAssetUrisQuery, schema::nft_metadata_crawler::parsed_asset_uris, }; use diesel::prelude::*; @@ -13,7 +13,7 @@ use tracing::warn; #[derive(Clone, Debug, Deserialize, FieldCount, Identifiable, Insertable, Serialize)] #[diesel(primary_key(asset_uri))] #[diesel(table_name = parsed_asset_uris)] -pub struct NFTMetadataCrawlerURIs { +pub struct ParsedAssetUris { asset_uri: String, raw_image_uri: Option, raw_animation_uri: Option, @@ -27,7 +27,7 @@ pub struct NFTMetadataCrawlerURIs { last_transaction_version: i64, } -impl NFTMetadataCrawlerURIs { +impl ParsedAssetUris { pub fn new(asset_uri: &str) -> Self { Self { asset_uri: asset_uri.to_string(), @@ -172,8 +172,8 @@ impl NFTMetadataCrawlerURIs { } } -impl From for NFTMetadataCrawlerURIs { - fn from(query: NFTMetadataCrawlerURIsQuery) -> Self { +impl From for ParsedAssetUris { + fn from(query: ParsedAssetUrisQuery) -> Self { Self { asset_uri: query.asset_uri, raw_image_uri: query.raw_image_uri, diff --git a/ecosystem/nft-metadata-crawler-parser/src/models/nft_metadata_crawler_uris_query.rs b/ecosystem/nft-metadata-crawler/src/models/parsed_asset_uris_query.rs similarity index 92% rename from ecosystem/nft-metadata-crawler-parser/src/models/nft_metadata_crawler_uris_query.rs rename to ecosystem/nft-metadata-crawler/src/models/parsed_asset_uris_query.rs index 65ee71ebe7ad3..409dac08632fb 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/models/nft_metadata_crawler_uris_query.rs +++ b/ecosystem/nft-metadata-crawler/src/models/parsed_asset_uris_query.rs @@ -16,7 +16,7 @@ use tracing::error; #[derive(Debug, Deserialize, Identifiable, Queryable, Serialize)] #[diesel(primary_key(asset_uri))] #[diesel(table_name = parsed_asset_uris)] -pub struct NFTMetadataCrawlerURIsQuery { +pub struct ParsedAssetUrisQuery { pub asset_uri: String, pub raw_image_uri: Option, pub raw_animation_uri: Option, @@ -31,7 +31,7 @@ pub struct NFTMetadataCrawlerURIsQuery { pub last_transaction_version: i64, } -impl NFTMetadataCrawlerURIsQuery { +impl ParsedAssetUrisQuery { pub fn get_by_asset_uri( conn: &mut PooledConnection>, asset_uri: &str, @@ -39,7 +39,7 @@ impl NFTMetadataCrawlerURIsQuery { let mut op = || { parsed_asset_uris::table .find(asset_uri) - .first::(conn) + .first::(conn) .optional() .map_err(Into::into) }; @@ -65,7 +65,7 @@ impl NFTMetadataCrawlerURIsQuery { .filter(parsed_asset_uris::raw_image_uri.eq(raw_image_uri)) .filter(parsed_asset_uris::asset_uri.ne(asset_uri)) .filter(parsed_asset_uris::cdn_image_uri.is_not_null()) - .first::(conn) + .first::(conn) .optional() .map_err(Into::into) }; @@ -91,7 +91,7 @@ impl NFTMetadataCrawlerURIsQuery { .filter(parsed_asset_uris::raw_animation_uri.eq(raw_animation_uri)) .filter(parsed_asset_uris::asset_uri.ne(asset_uri)) .filter(parsed_asset_uris::cdn_animation_uri.is_not_null()) - .first::(conn) + .first::(conn) .optional() .map_err(Into::into) }; @@ -108,7 +108,7 @@ impl NFTMetadataCrawlerURIsQuery { } } -impl Default for NFTMetadataCrawlerURIsQuery { +impl Default for ParsedAssetUrisQuery { fn default() -> Self { Self { asset_uri: "".to_string(), diff --git a/ecosystem/nft-metadata-crawler/src/parser/config.rs b/ecosystem/nft-metadata-crawler/src/parser/config.rs new file mode 100644 index 0000000000000..e918755ab33f4 --- /dev/null +++ b/ecosystem/nft-metadata-crawler/src/parser/config.rs @@ -0,0 +1,48 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use crate::utils::constants::{ + DEFAULT_IMAGE_QUALITY, DEFAULT_MAX_FILE_SIZE_BYTES, DEFAULT_MAX_IMAGE_DIMENSIONS, + DEFAULT_MAX_NUM_PARSE_RETRIES, +}; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct ParserConfig { + pub google_application_credentials: Option, + pub bucket: String, + pub cdn_prefix: String, + pub ipfs_prefix: String, + pub ipfs_auth_key: Option, + #[serde(default = "ParserConfig::default_max_file_size_bytes")] + pub max_file_size_bytes: u32, + #[serde(default = "ParserConfig::default_image_quality")] + pub image_quality: u8, // Quality up to 100 + #[serde(default = "ParserConfig::default_max_image_dimensions")] + pub max_image_dimensions: u32, + #[serde(default = "ParserConfig::default_max_num_parse_retries")] + pub max_num_parse_retries: i32, + #[serde(default)] + pub ack_parsed_uris: bool, + #[serde(default)] + pub uri_blacklist: Vec, +} + +impl ParserConfig { + pub const fn default_max_file_size_bytes() -> u32 { + DEFAULT_MAX_FILE_SIZE_BYTES + } + + pub const fn default_image_quality() -> u8 { + DEFAULT_IMAGE_QUALITY + } + + pub const fn default_max_image_dimensions() -> u32 { + DEFAULT_MAX_IMAGE_DIMENSIONS + } + + pub const fn default_max_num_parse_retries() -> i32 { + DEFAULT_MAX_NUM_PARSE_RETRIES + } +} diff --git a/ecosystem/nft-metadata-crawler-parser/src/parser.rs b/ecosystem/nft-metadata-crawler/src/parser/mod.rs similarity index 97% rename from ecosystem/nft-metadata-crawler-parser/src/parser.rs rename to ecosystem/nft-metadata-crawler/src/parser/mod.rs index 8997f4dc019b2..56830330f007e 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/parser.rs +++ b/ecosystem/nft-metadata-crawler/src/parser/mod.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - config::{ParserConfig, Server}, + config::Server, utils::{ counters::{ GOT_CONNECTION_COUNT, PARSER_FAIL_COUNT, PARSER_INVOCATIONS_COUNT, @@ -10,10 +10,10 @@ use crate::{ }, database::check_or_update_chain_id, }, - worker::Worker, }; use axum::{http::StatusCode, response::Response, routing::post, Router}; use bytes::Bytes; +use config::ParserConfig; use diesel::{ r2d2::{ConnectionManager, Pool}, PgConnection, @@ -21,6 +21,10 @@ use diesel::{ use google_cloud_storage::client::{Client as GCSClient, ClientConfig as GCSClientConfig}; use std::sync::Arc; use tracing::{error, info, warn}; +use worker::Worker; + +pub mod config; +mod worker; /// Struct to hold context required for parsing #[derive(Clone)] @@ -28,14 +32,12 @@ pub struct ParserContext { pub parser_config: Arc, pub pool: Pool>, pub gcs_client: Arc, - pub max_num_retries: i32, } impl ParserContext { pub async fn new( parser_config: ParserConfig, pool: Pool>, - max_num_retries: i32, ) -> Self { if let Some(google_application_credentials) = &parser_config.google_application_credentials { @@ -64,7 +66,6 @@ impl ParserContext { parser_config: Arc::new(parser_config), pool, gcs_client: Arc::new(GCSClient::new(gcs_config)), - max_num_retries, } } @@ -151,7 +152,7 @@ impl ParserContext { let mut worker = Worker::new( self.parser_config.clone(), conn, - self.max_num_retries, + self.parser_config.max_num_parse_retries, self.gcs_client.clone(), &pubsub_message, parts[0], diff --git a/ecosystem/nft-metadata-crawler-parser/src/worker.rs b/ecosystem/nft-metadata-crawler/src/parser/worker.rs similarity index 96% rename from ecosystem/nft-metadata-crawler-parser/src/worker.rs rename to ecosystem/nft-metadata-crawler/src/parser/worker.rs index 85d221fec12f5..dce2f642f48b5 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/worker.rs +++ b/ecosystem/nft-metadata-crawler/src/parser/worker.rs @@ -2,11 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - config::ParserConfig, - models::{ - nft_metadata_crawler_uris::NFTMetadataCrawlerURIs, - nft_metadata_crawler_uris_query::NFTMetadataCrawlerURIsQuery, - }, + models::{parsed_asset_uris::ParsedAssetUris, parsed_asset_uris_query::ParsedAssetUrisQuery}, + parser::config::ParserConfig, utils::{ counters::{ DUPLICATE_ASSET_URI_COUNT, DUPLICATE_RAW_ANIMATION_URI_COUNT, @@ -38,7 +35,7 @@ pub struct Worker { max_num_retries: i32, gcs_client: Arc, pubsub_message: String, - model: NFTMetadataCrawlerURIs, + model: ParsedAssetUris, asset_data_id: String, asset_uri: String, last_transaction_version: i64, @@ -59,7 +56,7 @@ impl Worker { last_transaction_timestamp: chrono::NaiveDateTime, force: bool, ) -> Self { - let model = NFTMetadataCrawlerURIs::new(asset_uri); + let model = ParsedAssetUris::new(asset_uri); let worker = Self { parser_config, conn, @@ -81,8 +78,7 @@ impl Worker { pub async fn parse(&mut self) -> anyhow::Result<()> { // Deduplicate asset_uri // Exit if not force or if asset_uri has already been parsed - let prev_model = - NFTMetadataCrawlerURIsQuery::get_by_asset_uri(&mut self.conn, &self.asset_uri); + let prev_model = ParsedAssetUrisQuery::get_by_asset_uri(&mut self.conn, &self.asset_uri); if let Some(pm) = prev_model { DUPLICATE_ASSET_URI_COUNT.inc(); self.model = pm.into(); @@ -181,7 +177,7 @@ impl Worker { false } else { self.model.get_raw_image_uri().map_or(true, |uri| { - match NFTMetadataCrawlerURIsQuery::get_by_raw_image_uri( + match ParsedAssetUrisQuery::get_by_raw_image_uri( &mut self.conn, &self.asset_uri, &uri, @@ -288,7 +284,7 @@ impl Worker { None } else { self.model.get_raw_animation_uri().and_then(|uri| { - match NFTMetadataCrawlerURIsQuery::get_by_raw_animation_uri( + match ParsedAssetUrisQuery::get_by_raw_animation_uri( &mut self.conn, &self.asset_uri, &uri, diff --git a/ecosystem/nft-metadata-crawler-parser/src/schema.rs b/ecosystem/nft-metadata-crawler/src/schema.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/schema.rs rename to ecosystem/nft-metadata-crawler/src/schema.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/constants.rs b/ecosystem/nft-metadata-crawler/src/utils/constants.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/utils/constants.rs rename to ecosystem/nft-metadata-crawler/src/utils/constants.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/counters.rs b/ecosystem/nft-metadata-crawler/src/utils/counters.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/utils/counters.rs rename to ecosystem/nft-metadata-crawler/src/utils/counters.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/database.rs b/ecosystem/nft-metadata-crawler/src/utils/database.rs similarity index 96% rename from ecosystem/nft-metadata-crawler-parser/src/utils/database.rs rename to ecosystem/nft-metadata-crawler/src/utils/database.rs index 89202f8ffd783..6763cf3cd3f6d 100644 --- a/ecosystem/nft-metadata-crawler-parser/src/utils/database.rs +++ b/ecosystem/nft-metadata-crawler/src/utils/database.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::{ - models::{ledger_info::LedgerInfo, nft_metadata_crawler_uris::NFTMetadataCrawlerURIs}, + models::{ledger_info::LedgerInfo, parsed_asset_uris::ParsedAssetUris}, schema, }; use anyhow::Context; @@ -35,7 +35,7 @@ pub fn run_migrations(pool: &Pool>) { /// Upserts URIs into database pub fn upsert_uris( conn: &mut PooledConnection>, - entry: &NFTMetadataCrawlerURIs, + entry: &ParsedAssetUris, ltv: i64, ) -> anyhow::Result { use schema::nft_metadata_crawler::parsed_asset_uris::dsl::*; diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/gcs.rs b/ecosystem/nft-metadata-crawler/src/utils/gcs.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/utils/gcs.rs rename to ecosystem/nft-metadata-crawler/src/utils/gcs.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/image_optimizer.rs b/ecosystem/nft-metadata-crawler/src/utils/image_optimizer.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/utils/image_optimizer.rs rename to ecosystem/nft-metadata-crawler/src/utils/image_optimizer.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/json_parser.rs b/ecosystem/nft-metadata-crawler/src/utils/json_parser.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/utils/json_parser.rs rename to ecosystem/nft-metadata-crawler/src/utils/json_parser.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/mod.rs b/ecosystem/nft-metadata-crawler/src/utils/mod.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/utils/mod.rs rename to ecosystem/nft-metadata-crawler/src/utils/mod.rs diff --git a/ecosystem/nft-metadata-crawler-parser/src/utils/uri_parser.rs b/ecosystem/nft-metadata-crawler/src/utils/uri_parser.rs similarity index 100% rename from ecosystem/nft-metadata-crawler-parser/src/utils/uri_parser.rs rename to ecosystem/nft-metadata-crawler/src/utils/uri_parser.rs