Merge pull request #21 from Relifest/main

Modifications made in response to suggestions 1 and 3 raised by Mr. Daniel during the TDML meeting
openrsgis · Oct 30, 2024 · e808920 · e808920
2 parents 35e5afc + ac21a43
commit e808920
Show file tree

Hide file tree

Showing 12 changed files with 124 additions and 54 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -20,13 +20,13 @@ jobs:
       uses: actions/cache@v4
       with:
         path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
 
         restore-keys: |
           ${{ runner.os }}-pip-
 
     - name: Install dependencies
-      run: pip install -r requirements.txt && pip install .[dev]
+      run: pip install .[dev] && pip install .[all]
 
     - name: Run tests
       run: pytest
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pytdml"
+dynamic = ["version", "authors", "license", "description", "readme"]
+dependencies = [
+    "geojson~=3.1.0",
+    "Pillow~=10.4.0",
+    "PyYAML~=6.0",
+    "numpy~=1.26.4",
+    "pandas~=2.2.2",
+    "opencv-python~=4.10.0.84",
+    "tensorflow~=2.17.0",
+    "jsonschema~=4.23.0",
+    "requests~=2.32.3",
+    "pydantic~=2.8.2",
+    "urllib3~=1.25.4",
+    "rasterio~=1.3.10",
+    "matplotlib~=3.9.1",
+    "minio~=7.2.7",
+    "tqdm~=4.66.4",
+    "s3fs~=2024.9.0"
+]
+
+[project.optional-dependencies]
+torch = [
+    "torch~=2.3.1",
+    "torchvision~=0.18.1",
+    "torchdata~=0.7.1"
+]
+
+[project.urls]
+homepage = "https://github.com/TrainingDML/pytdml"
+
+[tool.setuptools.packages.find]
+where = ["."]
diff --git a/pytdml/convert_utils.py b/pytdml/convert_utils.py
@@ -3,7 +3,7 @@
 from geojson import Feature
 
 from pytdml.io import write_to_json
-from pytdml.type import EOTrainingData, EOTrainingDataset, EOTask, ObjectLabel,PixelLabel
+from pytdml.type import AI_EOTrainingData, EOTrainingDataset, AI_EOTask, AI_ObjectLabel, AI_PixelLabel
 import os
 import re
 import time
@@ -102,7 +102,7 @@ def convert_coco_to_tdml(coco_dataset_path, output_json_path):
                 coord = [[points[0], points[1]], [points[0] + points[2], points[1]], [points[0] + points[2],
                                                                                       points[1] + points[3]],
                          [points[0], points[1] + points[2]]]
-                labels = ObjectLabel(is_negative=False, type="AI_ObjectLabel", confidence=1.0, object=Feature(
+                labels = AI_ObjectLabel(is_negative=False, type="AI_ObjectLabel", confidence=1.0, object=Feature(
                     id="feature " + str(i), geometry={
                         "type": "Polygon",
                         "coordinates": coord
@@ -112,7 +112,7 @@ def convert_coco_to_tdml(coco_dataset_path, output_json_path):
             training_type = categorize_string(os.path.basename(os.path.dirname(image_json["coco_url"])))
 
             numbers_of_labels = len(object_labels)
-            td = EOTrainingData(id=str(image_json["id"]),type="AI_EOTrainingData",data_sources=[""],
+            td = AI_EOTrainingData(id=str(image_json["id"]),type="AI_EOTrainingData",data_sources=[""],
                                  dataset_id=dataset_id, training_type=training_type,
                                 number_of_labels=numbers_of_labels, labels=object_labels,
                                 date_time=[image_json["date_captured"].replace(' ', 'T')],extent=None, data_URL=[image_json["coco_url"]])
@@ -131,7 +131,7 @@ def convert_coco_to_tdml(coco_dataset_path, output_json_path):
         type="AI_EOTrainingDataset",
         name=dataset_name,
         description=dataset_description,
-        tasks=[EOTask(task_type="Object Detection",
+        tasks=[AI_EOTask(task_type="Object Detection",
                       id=str(dataset_id) + "_task",
                       dataset_id=str(dataset_id),
                       type='AI_EOTask',
@@ -202,16 +202,16 @@ def convert_stac_to_tdml(stac_dataset_path, output_json_path):
             data_url.append(img_path)
             label_url = label_path
             image_type = label_type
-            labels = [PixelLabel(confidence=1.0,type="AI_PixelLabel",image_URL=[label_url],image_format=[image_type])]
+            labels = [AI_PixelLabel(confidence=1.0,type="AI_PixelLabel",image_URL=[label_url],image_format=[image_type])]
             td_list.append(
-                EOTrainingData(id=item_id,type="AI_EOTrainingData",training_type="Train", dataset_id=dataset_id,number_of_labels=1,labels=labels,extent=item_extent,
+                AI_EOTrainingData(id=item_id,type="AI_EOTrainingData",training_type="Train", dataset_id=dataset_id,number_of_labels=1,labels=labels,extent=item_extent,
                                data_URL=data_url))
 
 
     for class_dict in label_classes:
         class_dict['value'] = class_dict.pop('classes')
 
-    tasks = [EOTask(task_type=task_name,
+    tasks = [AI_EOTask(task_type=task_name,
                       id=str(dataset_id) + "_task",
                     dataset_id= str(dataset_id),
                       type='AI_EOTask')]

diff --git a/pytdml/tdml_image_crop.py b/pytdml/tdml_image_crop.py
@@ -39,7 +39,7 @@
 from geojson import Feature, Polygon
 
 from pytdml.io import read_from_json
-from pytdml.type import EOTrainingDataset, EOTrainingData, PixelLabel
+from pytdml.type import EOTrainingDataset, AI_EOTrainingData, AI_PixelLabel
 from pytdml.utils import remove_empty
 
 
@@ -62,9 +62,9 @@ def td_image_crop(td: EOTrainingDataset, save_tdml_path: str, save_crop_dir: str
             crop_image_list = image_crop(image_url, image_dir, sub_size)
             crop_label_list = image_crop(label_url, label_dir, sub_size)
             for crop_image_url, crop_label_url in zip(crop_image_list, crop_label_list):
-                new_d = EOTrainingData(
+                new_d = AI_EOTrainingData(
                     id=str(index),
-                    labels=[PixelLabel(image_url=crop_label_url)],
+                    labels=[AI_PixelLabel(image_url=crop_label_url)],
                     data_url=crop_image_url,
                 )
                 index = index + 1

diff --git a/pytdml/type/__init__.py b/pytdml/type/__init__.py
@@ -41,4 +41,9 @@
 from .basic_types import QualityElement
 from .basic_types import DataQuality
 from .basic_types import TrainingDataset
+from .extended_types import AI_PixelLabel
+from .extended_types import AI_ObjectLabel
+from .extended_types import AI_ObjectLabel
+from .extended_types import AI_EOTask
+from .extended_types import AI_EOTrainingData
 from .extended_types import EOTrainingDataset
diff --git a/pytdml/type/basic_types.py b/pytdml/type/basic_types.py
@@ -1289,8 +1289,8 @@ class AI_Label(BaseCamelModel):
 
     type: Literal["AI_AbstractLabel"]
 
-    is_negative: Optional[bool] = False  # Optional without default value
-    confidence: Optional[float] = Field(1.0, ge=0.0, le=1.0)
+    is_negative: Optional[bool] = None
+    confidence: Optional[float] = Field(None, ge=0.0, le=1.0)
 
     def to_dict(self):
         return self.model_dump(by_alias=True, exclude_none=True)

diff --git a/pytdml/type/extended_types.py b/pytdml/type/extended_types.py
@@ -195,7 +195,7 @@ class EOTrainingDataset(TrainingDataset):
     # For Convinience, we allow the user to specify the bands by name
 
     bands: Optional[List[MD_Band]] = None
-    extent: Optional[EX_Extent] = None
+    extent: Optional[Union[EX_Extent, List[Union[int, float]]]] = None
     image_size: Optional[str] = None
 
     def to_dict(self):
@@ -204,4 +204,12 @@ def to_dict(self):
     @staticmethod
     def from_dict(json_dict):
         new_dict = copy.deepcopy(json_dict)
+        if new_dict.__contains__('extent'):
+            extent = new_dict['extent']
+            for i in range(len(extent)):
+                if EX_Extent.can_build_from_data(extent[i]):
+                    extent[i] = EX_Extent.from_dict(extent[i])
+                else:
+                    continue
+            new_dict['extent'] = extent
         return EOTrainingDataset(**new_dict)
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py
@@ -54,19 +54,36 @@ def read(filename, encoding='utf-8'):
             break
 
 
-# get dependencies
-with open("requirements.txt") as f:
-    install_requires = [line for line in f if line and line[0] not in "#-"]
+# use README.md for project long_description
+long_description = read('README.md')
+
+from setuptools import find_packages, setup
+import io
+import os.path
 
+def read(filename, encoding='utf-8'):
+    """read file contents"""
+    full_path = os.path.join(os.path.dirname(__file__), filename)
+    with io.open(full_path, encoding=encoding) as fh:
+        contents = fh.read().strip()
+    return contents
+
+# get version number
+with open(os.path.join(os.path.dirname(__file__), 'pytdml/__init__.py')) as f:
+    for line in f:
+        if line.find("__version__") >= 0:
+            version = line.split("=")[1].strip()
+            version = version.strip('"')
+            version = version.strip("'")
+            break
 
 # use README.md for project long_description
 long_description = read('README.md')
 
 setup(
     name='pytdml',
     version=version,
-    description='Parsing and encoding training datasets based on OGC Training Data Markup Language for AI ('
-                'TrainingDML-AI) standard',
+    description='Parsing and encoding training datasets based on OGC Training Data Markup Language for AI (TrainingDML-AI) standard',
     long_description=long_description,
     long_description_content_type="text/markdown",
     author='Boyi Shangguan',
@@ -75,7 +92,6 @@ def read(filename, encoding='utf-8'):
     license='MIT',
     packages=find_packages(),
     include_package_data=True,
-    install_requires=install_requires,
     classifiers=[
         'Development Status :: 3 - Alpha',
         'Intended Audience :: Developers',

diff --git a/tests/data/WHU-building.json b/tests/data/WHU-building.json
@@ -18,14 +18,33 @@
   ],
   "classes": [
     {
-      "building": 1
+      "key": "building",
+      "value": 1
     }
   ],
   "numberOfClasses": 1,
   "bands": [
-    "red",
-    "green",
-    "blue"
+    {
+      "name": [
+        {
+          "code": "red"
+        }
+      ]
+    },
+    {
+      "name": [
+        {
+          "code": "green"
+        }
+      ]
+    },
+    {
+      "name": [
+        {
+          "code": "blue"
+        }
+      ]
+    }
   ],
   "imageSize": "32507x15354",
   "tasks": [
@@ -58,8 +77,12 @@
       "labels": [
         {
           "type": "AI_PixelLabel",
-          "imageURL": ["change label/change_label.tif"],
-          "imageFormat": ["TIFF"]
+          "imageURL": [
+            "change_label/change_label.tif"
+          ],
+          "imageFormat": [
+            "image/tiff"
+          ]
         }
       ]
     }

diff --git a/tests/test_basic_types.py b/tests/test_basic_types.py
@@ -1,17 +1,17 @@
 import pytest
-from pydantic import ValidationError, BaseModel, validator
+from pydantic import ValidationError, BaseModel, validator, field_validator
 import jsonschema
 import requests
 
-from pytdml.type.basic_types import _validate_date, to_camel,Labeler
+from pytdml.type.basic_types import _validate_date, to_camel, AI_Labeler
 
 base_url = "https://raw.githubusercontent.com/opengeospatial/TrainingDML-AI_SWG/main/schemas/1.0/json_schema/{}.json"
 
 
 class test_date_model(BaseModel):
     date: str
 
-    @validator("date")
+    @field_validator("date")
     def validate_date(cls, v):
         return _validate_date(v)
 
@@ -120,7 +120,7 @@ def test_required_elements_with_Labeler():
         "name": "zhaoyan"
     }
     with pytest.raises( ValidationError):
-        Labeler(**data)
+        AI_Labeler.from_dict(data)
 
 # Test valid Labeler and with remote schema
 def test_valid_Labeler_schema():
@@ -129,10 +129,10 @@ def test_valid_Labeler_schema():
         "id": "1",
         "name": "zhaoyan"
     }
-    labeler = Labeler(**data)
+    labeler = AI_Labeler.from_dict(data)
 
     remote_schema_url = base_url.format("ai_labeler")
     response = requests.get(remote_schema_url)
     remote_schema = response.json()
 
-    jsonschema.validate(instance=labeler.dict(), schema=remote_schema)
+    jsonschema.validate(instance=labeler.to_dict(), schema=remote_schema)
diff --git a/tests/test_tdml_io.py b/tests/test_tdml_io.py
@@ -12,4 +12,4 @@ def test_read_and_write():
     td = read_from_json(tdml_path)
     with open(tdml_path, 'r') as f:
         data = json.load(f)
-    assert td.dict(by_alias=True,exclude_none=True) == data
+    assert td.to_dict() == data