Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Serializers."""
import abc
from datetime import datetime
from typing import TypedDict

import attr
from stac_pydantic.shared import DATETIME_RFC339

from stac_fastapi.types import stac as stac_types
from stac_fastapi.types.links import CollectionLinks, ItemLinks, resolve_links
Expand All @@ -22,6 +24,30 @@ def db_to_stac(cls, item: dict, base_url: str) -> TypedDict:
class ItemSerializer(Serializer):
"""Serialization methods for STAC items."""

@classmethod
def stac_to_db(cls, stac_data: TypedDict, base_url: str) -> stac_types.Item:
"""Transform stac item to database ready stac item."""
item_links = ItemLinks(
collection_id=stac_data["collection"],
item_id=stac_data["id"],
base_url=base_url,
).create_links()
stac_data["links"] = item_links

# elasticsearch doesn't like the fact that some values are float and some were int
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to fix this in #31 -- I'm about to merge the stac-server issue about the same thing, so it should be straightforward to port it.

if "eo:bands" in stac_data["properties"]:
for wave in stac_data["properties"]["eo:bands"]:
for k, v in wave.items():
if type(v) != str:
v = float(v)
wave.update({k: v})

now = datetime.utcnow().strftime(DATETIME_RFC339)
if "created" not in stac_data["properties"]:
stac_data["properties"]["created"] = str(now)
stac_data["properties"]["updated"] = str(now)
return stac_data

@classmethod
def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item:
"""Transform database model to stac item."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from stac_fastapi.types import stac as stac_types
from stac_fastapi.types.core import BaseTransactionsClient
from stac_fastapi.types.errors import ConflictError, ForeignKeyError, NotFoundError
from stac_fastapi.types.links import CollectionLinks, ItemLinks
from stac_fastapi.types.links import CollectionLinks

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -54,18 +54,8 @@ def _create_item_index(self):
def create_item(self, model: stac_types.Item, **kwargs):
"""Create item."""
base_url = str(kwargs["request"].base_url)
item_links = ItemLinks(
collection_id=model["collection"], item_id=model["id"], base_url=base_url
).create_links()
model["links"] = item_links

# elasticsearch doesn't like the fact that some values are float and some were int
if "eo:bands" in model["properties"]:
for wave in model["properties"]["eo:bands"]:
for k, v in wave.items():
if type(v) != str:
v = float(v)
wave.update({k: v})
self._create_item_index()

if not self.client.exists(index="stac_collections", id=model["collection"]):
raise ForeignKeyError(f"Collection {model['collection']} does not exist")
Expand All @@ -75,14 +65,10 @@ def create_item(self, model: stac_types.Item, **kwargs):
f"Item {model['id']} in collection {model['collection']} already exists"
)

now = datetime.utcnow().strftime(DATETIME_RFC339)
if "created" not in model["properties"]:
model["properties"]["created"] = str(now)

self._create_item_index()
data = ItemSerializer.stac_to_db(model, base_url)

self.client.index(
index="stac_items", doc_type="_doc", id=model["id"], document=model
index="stac_items", doc_type="_doc", id=model["id"], document=data
)
return ItemSerializer.db_to_stac(model, base_url)

Expand Down Expand Up @@ -160,33 +146,8 @@ def __attrs_post_init__(self):
settings = ElasticsearchSettings()
self.client = settings.create_client

def _create_item_index(self):
mapping = {
"mappings": {
"properties": {
"geometry": {"type": "geo_shape"},
"id": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
"properties__datetime": {
"type": "text",
"fields": {"keyword": {"type": "keyword"}},
},
}
}
}

_ = self.client.indices.create(
index="stac_items",
body=mapping,
ignore=400, # ignore 400 already exists code
)

def _preprocess_item(self, model: stac_types.Item, base_url) -> stac_types.Item:
"""Preprocess items to match data model."""
item_links = ItemLinks(
collection_id=model["collection"], item_id=model["id"], base_url=base_url
).create_links()
model["links"] = item_links

if not self.client.exists(index="stac_collections", id=model["collection"]):
raise ForeignKeyError(f"Collection {model['collection']} does not exist")

Expand All @@ -195,22 +156,13 @@ def _preprocess_item(self, model: stac_types.Item, base_url) -> stac_types.Item:
f"Item {model['id']} in collection {model['collection']} already exists"
)

now = datetime.utcnow().strftime(DATETIME_RFC339)
if "created" not in model["properties"]:
model["properties"]["created"] = str(now)

# elasticsearch doesn't like the fact that some values are float and some were int
if "eo:bands" in model["properties"]:
for wave in model["properties"]["eo:bands"]:
for k, v in wave.items():
if type(v) != str:
v = float(v)
wave.update({k: v})
return model
item = ItemSerializer.stac_to_db(model, base_url)
return item

def bulk_item_insert(self, items: Items, **kwargs) -> str:
"""Bulk item insertion using es."""
self._create_item_index()
transactions_client = TransactionsClient()
transactions_client._create_item_index()
try:
base_url = str(kwargs["request"].base_url)
except Exception:
Expand All @@ -220,14 +172,6 @@ def bulk_item_insert(self, items: Items, **kwargs) -> str:
]
return_msg = f"Successfully added {len(processed_items)} items."

# helpers.bulk(
# self.client,
# processed_items,
# index="stac_items",
# doc_type="_doc",
# request_timeout=200,
# )

def bulk_sync(processed_items):
actions = [
{"_index": "stac_items", "_source": item} for item in processed_items
Expand Down