From 6010384dc237c77f317fda96bb9e7e5bed9f4418 Mon Sep 17 00:00:00 2001
From: tianweidut <liutianweidlut@gmail.com>
Date: Sat, 8 Oct 2022 10:47:23 +0800
Subject: [PATCH] add python sdk doc

---
 client/starwhale/__init__.py                  |   7 +-
 client/tests/sdk/test_loader.py               |   9 +-
 client/tests/sdk/test_model.py                |   3 +-
 docs/docs/reference/sdk/data_type.md          |   3 +
 docs/docs/reference/sdk/dataset.md            |   3 +
 docs/docs/reference/sdk/evaluation.md         |   3 +
 docs/docs/reference/sdk/other.md              |   3 +
 docs/docs/reference/sdk/overview.md           |   3 +
 .../current/reference/cli/model.md            |   2 +-
 .../current/reference/sdk/data_type.md        | 588 ++++++++++++++++++
 .../current/reference/sdk/dataset.md          | 165 +++++
 .../current/reference/sdk/evaluation.md       | 195 ++++++
 .../current/reference/sdk/other.md            |  53 ++
 .../current/reference/sdk/overview.md         |  43 ++
 docs/sidebars.js                              |   3 +
 15 files changed, 1070 insertions(+), 13 deletions(-)
 create mode 100644 docs/docs/reference/sdk/data_type.md
 create mode 100644 docs/docs/reference/sdk/other.md
 create mode 100644 docs/docs/reference/sdk/overview.md
 create mode 100644 docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/data_type.md
 create mode 100644 docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/other.md
 create mode 100644 docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/overview.md

diff --git a/client/starwhale/__init__.py b/client/starwhale/__init__.py
index 5fd75ebbac..bc9e8def83 100644
--- a/client/starwhale/__init__.py
+++ b/client/starwhale/__init__.py
@@ -1,5 +1,6 @@
 from starwhale.api.job import step, Context
 from starwhale.version import STARWHALE_VERSION as __version__
+from starwhale.base.uri import URI, URIType
 from starwhale.api.model import PipelineHandler, PPLResultIterator
 from starwhale.api.metric import multi_classification
 from starwhale.api.dataset import (
@@ -18,8 +19,6 @@
     get_data_loader,
     LocalFSLinkAuth,
     DefaultS3LinkAuth,
-    SWDSBinDataLoader,
-    UserRawDataLoader,
     COCOObjectAnnotation,
     SWDSBinBuildExecutor,
     UserRawBuildExecutor,
@@ -29,6 +28,8 @@
     "__version__",
     "PipelineHandler",
     "multi_classification",
+    "URI",
+    "URIType",
     "step",
     "Context",
     "get_data_loader",
@@ -41,8 +42,6 @@
     "BuildExecutor",  # SWDSBinBuildExecutor alias
     "UserRawBuildExecutor",
     "SWDSBinBuildExecutor",
-    "SWDSBinDataLoader",
-    "UserRawDataLoader",
     "Binary",
     "Text",
     "Audio",
diff --git a/client/tests/sdk/test_loader.py b/client/tests/sdk/test_loader.py
index 331d2160eb..f155a8bba5 100644
--- a/client/tests/sdk/test_loader.py
+++ b/client/tests/sdk/test_loader.py
@@ -4,13 +4,7 @@
 
 from pyfakefs.fake_filesystem_unittest import TestCase
 
-from starwhale import (
-    MIMEType,
-    S3LinkAuth,
-    get_data_loader,
-    SWDSBinDataLoader,
-    UserRawDataLoader,
-)
+from starwhale import MIMEType, S3LinkAuth, get_data_loader
 from starwhale.consts import AUTH_ENV_FNAME, SWDSBackendType
 from starwhale.base.uri import URI
 from starwhale.utils.fs import ensure_dir, ensure_file
@@ -22,6 +16,7 @@
     LocalFSStorageBackend,
 )
 from starwhale.core.dataset.tabular import TabularDatasetRow
+from starwhale.api._impl.dataset.loader import SWDSBinDataLoader, UserRawDataLoader
 
 from .. import ROOT_DIR
 
diff --git a/client/tests/sdk/test_model.py b/client/tests/sdk/test_model.py
index f2058bc88c..26db9f30ab 100644
--- a/client/tests/sdk/test_model.py
+++ b/client/tests/sdk/test_model.py
@@ -7,7 +7,7 @@
 
 from pyfakefs.fake_filesystem_unittest import TestCase
 
-from starwhale import Context, get_data_loader, PipelineHandler, UserRawDataLoader
+from starwhale import Context, get_data_loader, PipelineHandler
 from starwhale.consts import DEFAULT_PROJECT
 from starwhale.base.uri import URI
 from starwhale.utils.fs import ensure_dir
@@ -22,6 +22,7 @@
 from starwhale.core.dataset.type import MIMEType, ArtifactType, DatasetSummary
 from starwhale.core.dataset.store import DatasetStorage
 from starwhale.core.dataset.tabular import TabularDatasetRow
+from starwhale.api._impl.dataset.loader import UserRawDataLoader
 
 from .. import ROOT_DIR
 
diff --git a/docs/docs/reference/sdk/data_type.md b/docs/docs/reference/sdk/data_type.md
new file mode 100644
index 0000000000..20214061bf
--- /dev/null
+++ b/docs/docs/reference/sdk/data_type.md
@@ -0,0 +1,3 @@
+---
+title: Data Type
+---
diff --git a/docs/docs/reference/sdk/dataset.md b/docs/docs/reference/sdk/dataset.md
index e69de29bb2..8217dd99f2 100644
--- a/docs/docs/reference/sdk/dataset.md
+++ b/docs/docs/reference/sdk/dataset.md
@@ -0,0 +1,3 @@
+---
+title: Dataset Builder and Loader
+---
diff --git a/docs/docs/reference/sdk/evaluation.md b/docs/docs/reference/sdk/evaluation.md
index e69de29bb2..aaaf66fc2f 100644
--- a/docs/docs/reference/sdk/evaluation.md
+++ b/docs/docs/reference/sdk/evaluation.md
@@ -0,0 +1,3 @@
+---
+title: Model Evaluation
+---
diff --git a/docs/docs/reference/sdk/other.md b/docs/docs/reference/sdk/other.md
new file mode 100644
index 0000000000..a11bb625dc
--- /dev/null
+++ b/docs/docs/reference/sdk/other.md
@@ -0,0 +1,3 @@
+---
+title: Other
+---
diff --git a/docs/docs/reference/sdk/overview.md b/docs/docs/reference/sdk/overview.md
new file mode 100644
index 0000000000..43b4d1a971
--- /dev/null
+++ b/docs/docs/reference/sdk/overview.md
@@ -0,0 +1,3 @@
+---
+title: Python SDK
+---
diff --git a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/cli/model.md b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/cli/model.md
index 5b4f4d8f78..2a1b7ddc00 100644
--- a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/cli/model.md
+++ b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/cli/model.md
@@ -14,7 +14,7 @@ model命令提供适用于Standalone Instance和Cloud Instance的Starwhale Model
 
 model包含如下子命令：
 
-|Command|Standalone|Cloud|
+|命令|Standalone|Cloud|
 |-------|----------|-----|
 |`build`|✅|❌|
 |`copy`|✅|✅|
diff --git a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/data_type.md b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/data_type.md
new file mode 100644
index 0000000000..319465d156
--- /dev/null
+++ b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/data_type.md
@@ -0,0 +1,588 @@
+---
+title: 数据类型
+---
+
+## starwhale.COCOObjectAnnotation
+
+提供COCO类型的定义，Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L403)。
+
+```python
+COCOObjectAnnotation(
+    id: int,
+    image_id: int,
+    category_id: int,
+    segmentation: Union[t.List, t.Dict],
+    area: Union[float, int],
+    bbox: Union[BoundingBox, t.List[float]],
+    iscrowd: int,
+)
+```
+
+|参数|说明|
+|---|---|
+|`id`|object id，一般为全局object的递增id|
+|`image_id`|image id，一般为图片id|
+|`category_id`|category id，一般为目标检测中类别的id|
+|`segmentation`|物体轮廓表示，Polygon(多边形的点)或RLE格式|
+|`area`|object面积|
+|`bbox`|表示bounding box，可以为BoundingBox类型或float的列表|
+|`iscrowd`|0表示是一个单独的object，1表示两个没有分开的object|
+
+### 使用示例
+
+```python
+def _make_coco_annotations(
+    self, mask_fpath: Path, image_id: int
+) -> t.List[COCOObjectAnnotation]:
+    mask_img = PILImage.open(str(mask_fpath))
+
+    mask = np.array(mask_img)
+    object_ids = np.unique(mask)[1:]
+    binary_mask = mask == object_ids[:, None, None]
+    # TODO: tune permute without pytorch
+    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
+    binary_mask_tensor = (
+        binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
+    )
+
+    coco_annotations = []
+    for i in range(0, len(object_ids)):
+        _pos = np.where(binary_mask[i])
+        _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
+        _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
+        _bbox = BoundingBox(
+            x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
+        )
+
+        rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy())  # type: ignore
+        rle["counts"] = rle["counts"].decode("utf-8")
+
+        coco_annotations.append(
+            COCOObjectAnnotation(
+                id=self.object_id,
+                image_id=image_id,
+                category_id=1,  # PennFudan Dataset only has one class-PASPersonStanding
+                segmentation=rle,
+                area=_bbox.width * _bbox.height,
+                bbox=_bbox,
+                iscrowd=0,  # suppose all instances are not crowd
+            )
+        )
+        self.object_id += 1
+
+    return coco_annotations
+```
+
+## starwhale.GrayscaleImage
+
+提供灰度图类型，比如MNIST中数字手写体图片，是 `Image` 类型的一个特例。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L301)。
+
+```python
+GrayscaleImage(
+    fp: _TArtifactFP = "",
+    display_name: str = "",
+    shape: Optional[_TShape] = None,
+    as_mask: bool = False,
+    mask_uri: str = "",
+)
+```
+
+|参数|说明|
+|---|---|
+|`fp`|图片的路径、IO对象或文件内容的bytes|
+|`display_name`|Dataset Viewer上展示的名字|
+|`shape`|图片的Width和Height，channel默认为1|
+|`as_mask`|是否作为Mask图片|
+|`mask_uri`|Mask原图的URI|
+
+### 使用示例
+
+```python
+for i in range(0, min(data_number, label_number)):
+    _data = data_file.read(image_size)
+    _label = struct.unpack(">B", label_file.read(1))[0]
+    yield GrayscaleImage(
+        _data,
+        display_name=f"{i}",
+        shape=(height, width, 1),
+    ), {"label": _label}
+```
+
+### 函数
+
+#### to_types
+
+```python
+to_bytes(encoding: str= "utf-8") -> bytes
+```
+
+#### carry_raw_data
+
+```python
+carry_raw_data() -> GrayscaleImage
+```
+
+#### astype
+
+```python
+astype() -> Dict[str, t.Any]
+```
+
+## starwhale.BoundingBox
+
+提供边界框类型，目前为 `LTWH` 格式，即 `left_x`, `top_y`, `width` 和 `height`。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L363)。
+
+```python
+BoundingBox(
+    x: float,
+    y: float,
+    width: float,
+    height: float
+)
+```
+
+|参数|说明|
+|---|---|
+|`x`|left_x的坐标|
+|`y`|top_y的坐标|
+|`width`|图片的宽度|
+|`height`|图片的高度|
+
+## starwhale.ClassLabel
+
+描述label的数量和类型，Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L344)。
+
+```python
+ClassLabel(
+     names: List[Union[int, float, str]]
+)
+```
+
+## starwhale.Image
+
+图片类型，Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L267)。
+
+```python
+Image(
+    fp: _TArtifactFP = "",
+    display_name: str = "",
+    shape: Optional[_TShape] = None,
+    mime_type: Optional[MIMEType] = None,
+    as_mask: bool = False,
+    mask_uri: str = "",
+)
+```
+
+|参数|说明|
+|---|---|
+|`fp`|图片的路径、IO对象或文件内容的bytes|
+|`display_name`|Dataset Viewer上展示的名字|
+|`shape`|图片的Width、Height和channel|
+|`mime_type`|MIMEType支持的类型|
+|`as_mask`|是否作为Mask图片|
+|`mask_uri`|Mask原图的URI|
+
+### 使用示例
+
+```python
+import io
+import typing as t
+import pickle
+from PIL import Image as PILImage
+from starwhale import Image, MIMEType
+
+def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
+    for path in paths:
+        with path.open("rb") as f:
+            content = pickle.load(f, encoding="bytes")
+            for data, label, filename in zip(
+                content[b"data"], content[b"labels"], content[b"filenames"]
+            ):
+                annotations = {
+                    "label": label,
+                    "label_display_name": dataset_meta["label_names"][label],
+                }
+
+                image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
+                image_bytes = io.BytesIO()
+                PILImage.fromarray(image_array).save(image_bytes, format="PNG")
+
+                yield Image(
+                    fp=image_bytes.getvalue(),
+                    display_name=filename.decode(),
+                    shape=image_array.shape,
+                    mime_type=MIMEType.PNG,
+                ), annotations
+
+
+```
+
+### 函数
+
+#### to_types
+
+```python
+to_bytes(encoding: str= "utf-8") -> bytes
+```
+
+#### carry_raw_data
+
+```python
+carry_raw_data() -> GrayscaleImage
+```
+
+#### astype
+
+```python
+astype() -> Dict[str, t.Any]
+```
+
+## starwhale.Audio
+
+音频类型，Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L324)。
+
+```python
+Audio(
+    fp: _TArtifactFP = "",
+    display_name: str = "",
+    shape: Optional[_TShape] = None,
+    mime_type: Optional[MIMEType] = None,
+)
+```
+
+|参数|说明|
+|---|---|
+|`fp`|图片的路径、IO对象或文件内容的bytes|
+|`display_name`|Dataset Viewer上展示的名字|
+|`shape`|图片的Width、Height和channel|
+|`mime_type`|MIMEType支持的类型|
+
+### 使用示例
+
+```python
+import typing as t
+from starwhale import Audio
+
+def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+    for path in validation_ds_paths:
+        with path.open() as f:
+            for item in f.readlines():
+                item = item.strip()
+                if not item:
+                    continue
+
+                data_path = dataset_dir / item
+                data = Audio(
+                    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
+                )
+
+                speaker_id, utterance_num = data_path.stem.split("_nohash_")
+                annotations = {
+                    "label": data_path.parent.name,
+                    "speaker_id": speaker_id,
+                    "utterance_num": int(utterance_num),
+                }
+                yield data, annotations
+```
+
+### 函数
+
+#### to_types
+
+```python
+to_bytes(encoding: str= "utf-8") -> bytes
+```
+
+#### carry_raw_data
+
+```python
+carry_raw_data() -> GrayscaleImage
+```
+
+#### astype
+
+```python
+astype() -> Dict[str, t.Any]
+```
+
+## starwhale.Text
+
+文本类型，默认为 `utf-8` 格式。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L380)。
+
+```python
+Text(
+    content: str,
+    encoding: str = "utf-8",
+)
+```
+
+|参数|说明|
+|---|---|
+|`content`|text内容|
+|`encoding`|text的编码格式|
+
+### 使用示例
+
+```python
+import typing as t
+from pathlib import Path
+from starwhale import Text
+
+def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+    root_dir = Path(__file__).parent.parent / "data"
+
+    with (root_dir / "fra-test.txt").open("r") as f:
+        for line in f.readlines():
+            line = line.strip()
+            if not line or line.startswith("CC-BY"):
+                continue
+
+            _data, _label, *_ = line.split("\t")
+            data = Text(_data, encoding="utf-8")
+            annotations = {"label": _label}
+            yield data, annotations
+```
+
+### 函数
+
+#### to_types
+
+```python
+to_bytes(encoding: str= "utf-8") -> bytes
+```
+
+#### carry_raw_data
+
+```python
+carry_raw_data() -> GrayscaleImage
+```
+
+#### astype
+
+```python
+astype() -> Dict[str, t.Any]
+```
+
+#### to_str
+
+```python
+to_str() -> str
+```
+
+## starwhale.Binary
+
+二进制类型，用bytes存储，Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L258)。
+
+```python
+Binary(
+    fp: _TArtifactFP = "",
+    mime_type: MIMEType = MIMEType.UNDEFINED,
+)
+```
+
+|参数|说明|
+|---|---|
+|`fp`|路径、IO对象或文件内容的bytes|
+|`mime_type`|MIMEType支持的类型|
+
+### 函数
+
+#### to_types
+
+```python
+to_bytes(encoding: str= "utf-8") -> bytes
+```
+
+#### carry_raw_data
+
+```python
+carry_raw_data() -> GrayscaleImage
+```
+
+#### astype
+
+```python
+astype() -> Dict[str, t.Any]
+```
+
+## starwhale.Link
+Link类型，用来制作 `remote-link` 和 `user-raw` 类型的数据集。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L432)。
+
+```python
+Link(
+    uri: str,
+    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
+    offset: int = 0,
+    size: int = -1,
+    data_type: Optional[BaseArtifact] = None,
+    with_local_fs_data: bool = False,
+)
+```
+
+|参数|说明|
+|---|---|
+|`uri`|原始数据的uri地址，目前支持localFS和S3两种协议|
+|`auth`|Link Auth信息|
+|`offset`|数据相对uri指向的文件偏移量|
+|`size`|数据大小|
+|`data_type`|Link指向的实际数据类型，目前支持 `Binary`, `Image`, `Text`, `Audio` 四种类型|
+|`with_local_fs_data`|是否包含本地文件系统中的数据，用于表示user-raw格式的数据|
+
+### 使用示例
+
+```python
+import typing as t
+import struct
+from pathlib import Path
+
+from starwhale import Link
+
+def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+    root_dir = Path(__file__).parent.parent / "data"
+    data_fpath = root_dir / "t10k-images-idx3-ubyte"
+    label_fpath = root_dir / "t10k-labels-idx1-ubyte"
+
+    with data_fpath.open("rb") as data_file, label_fpath.open("rb") as label_file:
+        _, data_number, height, width = struct.unpack(">IIII", data_file.read(16))
+        _, label_number = struct.unpack(">II", label_file.read(8))
+
+        image_size = height * width
+        offset = 16
+
+        for i in range(0, min(data_number, label_number)):
+            _data = Link(
+                uri=str(data_fpath.absolute()),
+                offset=offset,
+                size=image_size,
+                data_type=GrayscaleImage(
+                    display_name=f"{i}", shape=(height, width, 1)
+                ),
+                with_local_fs_data=True,
+            )
+            _label = struct.unpack(">B", label_file.read(1))[0]
+            yield _data, {"label": _label}
+            offset += image_size
+
+```
+
+### 函数
+
+#### astype
+
+```python
+astype() -> Dict[str, t.Any]
+```
+
+## starwhale.S3LinkAuth
+
+当数据存储在基于S3协议的对象存储上时，该类型负责描述授权、密钥信息。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L52)。
+
+```python
+S3LinkAuth(
+    name: str = "",
+    access_key: str = "",
+    secret: str = "",
+    endpoint: str = "",
+    region: str = "local",
+)
+```
+
+|参数|说明|
+|---|---|
+|`name`|Auth的名称|
+|`access_key`|S3连接中的access_key|
+|`secret`|S3连接中的secret|
+|`endpoint`|S3连接中的endpoint地址|
+|`region`|bucket所在的S3 region，默认为local|
+
+### 使用示例
+
+```python
+import struct
+import typing as t
+from pathlib import Path
+
+from starwhale import (
+    Link,
+    S3LinkAuth,
+    GrayscaleImage,
+    UserRawBuildExecutor,
+)
+class LinkRawDatasetProcessExecutor(UserRawBuildExecutor):
+    _auth = S3LinkAuth(name="mnist", access_key="minioadmin", secret="minioadmin")
+    _endpoint = "10.131.0.1:9000"
+    _bucket = "users"
+
+    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+        root_dir = Path(__file__).parent.parent / "data"
+
+        with (root_dir / "t10k-labels-idx1-ubyte").open("rb") as label_file:
+            _, label_number = struct.unpack(">II", label_file.read(8))
+
+            offset = 16
+            image_size = 28 * 28
+
+            uri = f"s3://{self._endpoint}@{self._bucket}/dataset/mnist/t10k-images-idx3-ubyte"
+            for i in range(label_number):
+                _data = Link(
+                    f"{uri}",
+                    self._auth,
+                    offset=offset,
+                    size=image_size,
+                    data_type=GrayscaleImage(display_name=f"{i}", shape=(28, 28, 1)),
+                )
+                _label = struct.unpack(">B", label_file.read(1))[0]
+                yield _data, {"label": _label}
+                offset += image_size
+```
+
+## starwhale.LocalFSLinkAuth
+
+描述数据存储在本地文件系统上，Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L151)。
+
+```python
+LocalFSLinkAuth = partial(LinkAuth, ltype=LinkType.LocalFS)
+```
+
+## starwhale.DefaultS3LinkAuth
+
+使用默认值初始化 `S3LinkAuth` 类型后得到的变量, Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L152)。
+
+```python
+DefaultS3LinkAuth = S3LinkAuth()
+```
+
+## starwhale.MIMEType
+
+描述Starwhale支持的多媒体类型，用Python Enum类型实现，用在 `Image`、`Video` 等类型的mime_type 属性上，能更好的进行Dataset Viewer。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L106)。
+
+```python
+class MIMEType(Enum):
+    PNG = "image/png"
+    JPEG = "image/jpeg"
+    WEBP = "image/webp"
+    SVG = "image/svg+xml"
+    GIF = "image/gif"
+    APNG = "image/apng"
+    AVIF = "image/avif"
+    MP4 = "video/mp4"
+    AVI = "video/avi"
+    WAV = "audio/wav"
+    MP3 = "audio/mp3"
+    PLAIN = "text/plain"
+    CSV = "text/csv"
+    HTML = "text/html"
+    GRAYSCALE = "x/grayscale"
+    UNDEFINED = "x/undefined"
+```
+
+## starwhale.LinkType
+
+描述Starwhale支持的remote-link类型，用Python Enum类型实现，目前支持 `LocalFS` 和 `S3` 两种类型。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/core/dataset/type.py#L23)。
+
+```python
+class LinkType(Enum):
+    LocalFS = "local_fs"
+    S3 = "s3"
+    UNDEFINED = "undefined"
+```
diff --git a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/dataset.md b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/dataset.md
index e69de29bb2..ad733ad626 100644
--- a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/dataset.md
+++ b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/dataset.md
@@ -0,0 +1,165 @@
+---
+title: 数据集构建和加载
+---
+
+## starwhale.SWDSBinBuildExecutor
+
+提供swds格式的数据集构建类，需要用户实现 `iter_item` 函数，返回一个可迭代的对象，包含data和annotations。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/dataset/builder.py#L138)。
+
+```python
+class DatasetProcessExecutor(SWDSBinBuildExecutor):
+    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+        ...
+```
+
+`iter_item` 返回一个可迭代的对象，通常写法是for循环中，yield data和annotations。对于swds格式的数据集，data一般为 `Audio`，`Image`，`Text`、`GrayscaleImage`和`Binary`。也接受用户yield bytes类型的data，会自动转化成 `Binary` 类型。以[MNIST](https://github.com/star-whale/starwhale/tree/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/example/mnist)为例，构建swds的数据集基本代码如下：
+
+```python
+import struct
+import typing as t
+from pathlib import Path
+
+from starwhale import (
+    Link,
+    GrayscaleImage,
+    SWDSBinBuildExecutor,
+)
+
+class DatasetProcessExecutor(SWDSBinBuildExecutor):
+    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+        root_dir = Path(__file__).parent.parent / "data"
+
+        with (root_dir / "t10k-images-idx3-ubyte").open("rb") as data_file, (
+            root_dir / "t10k-labels-idx1-ubyte"
+        ).open("rb") as label_file:
+            _, data_number, height, width = struct.unpack(">IIII", data_file.read(16))
+            _, label_number = struct.unpack(">II", label_file.read(8))
+            print(
+                f">data({data_file.name}) split data:{data_number}, label:{label_number} group"
+            )
+            image_size = height * width
+
+            for i in range(0, min(data_number, label_number)):
+                _data = data_file.read(image_size)
+                _label = struct.unpack(">B", label_file.read(1))[0]
+                yield GrayscaleImage(
+                    _data,
+                    display_name=f"{i}",
+                    shape=(height, width, 1),
+                ), {"label": _label}
+```
+
+## starwhale.UserRawBuildExecutor
+
+提供remote-link和user-raw格式的数据集构建类，需要用户实现 `iter_item` 函数，返回一个可迭代的对象，包含data和annotations，其中data需要是一个 `starwhale.Link` 类型。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/dataset/builder.py#L307)。
+
+```python
+class RawDatasetProcessExecutor(UserRawBuildExecutor):
+    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+        ...
+```
+
+以[Speech Commands](https://github.com/star-whale/starwhale/tree/main/example/speech_command)为例，构建remote-link的数据集基本代码如下：
+
+```python
+import typing as t
+from pathlib import Path
+
+from starwhale import (
+    Link,
+    Audio,
+    MIMEType,
+    S3LinkAuth,
+    UserRawBuildExecutor,
+)
+class LinkRawDatasetBuildExecutor(UserRawBuildExecutor):
+
+    _auth = S3LinkAuth(
+        name="speech", access_key="minioadmin", secret="minioadmin", region="local"
+    )
+    _addr = "10.131.0.1:9000"
+    _bucket = "users"
+
+    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
+        import boto3
+        from botocore.client import Config
+
+        s3 = boto3.resource(
+            "s3",
+            endpoint_url=f"http://{self._addr}",
+            aws_access_key_id=self._auth.access_key,
+            aws_secret_access_key=self._auth.secret,
+            config=Config(signature_version="s3v4"),
+            region_name=self._auth.region,
+        )
+
+        objects = s3.Bucket(self._bucket).objects.filter(
+            Prefix="dataset/SpeechCommands/speech_commands_v0.02"
+        )
+
+        for obj in objects:
+            path = Path(obj.key)  # type: ignore
+            command = path.parent.name
+            if (
+                command == "_background_noise_"
+                or "_nohash_" not in path.name
+                or obj.size < 10240
+                or not path.name.endswith(".wav")
+            ):
+                continue
+
+            speaker_id, utterance_num = path.stem.split("_nohash_")
+            uri = f"s3://{self._addr}@{self._bucket}/{obj.key.lstrip('/')}"
+            data = Link(
+                uri,
+               self._auth,
+                size=obj.size,
+                data_type=Audio(
+                    display_name=f"{command}/{path.name}",
+                    mime_type=MIMEType.WAV,
+                    shape=(1,),
+                ),
+            )
+            annotations = {
+                "label": command,
+                "speaker_id": speaker_id,
+                "utterance_num": int(utterance_num),
+            }
+            yield data, annotations
+```
+
+## starwhale.BuildExecutor
+
+`SWDSBinBuildExecutor` 类的别称，同为swds格式的数据集构建类。
+
+## starwhale.get_data_loader
+
+获取Starwhale Dataset的Data Loader，是一个可迭代的对象，能够获取数据集中具体样本的索引、data和annotations。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/dataset/loader.py)。该函数返回的loader有两种:一种是表示swds格式的 `SWDSBinDataLoader`, 另一种是表示remote-link或user-raw格式的 `UserRawDataLoader`。两种loader类型目前都能处理在LocalFS和S3协议的对象存储上数据。
+
+```python
+def get_data_loader(
+    dataset_uri: URI,
+    start: int = 0,
+    end: int = sys.maxsize,
+    logger: t.Union[loguru.Logger, None] = None,
+) -> DataLoader:
+```
+
+|参数|说明|
+|---|---|
+|`dataset_uri`| `starwhale.URI` 对象 |
+|`start`| 数据集index的起始位，默认从0开始 |
+|`end`| 数据集index的结束位。start和end表示是左闭右开的区间，即 `start <= i < end` |
+|`logger`|可传入自定义的logger对象|
+
+使用示例如下：
+
+```python
+from starwhale import get_data_loader, URI
+
+uri = URI("mnist/version/latest", expected_type="dataset")
+data_loader = get_data_loader(dataset_uri=uri)
+
+for idx, data, annotations in data_loader:
+    ...
+```
diff --git a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/evaluation.md b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/evaluation.md
index e69de29bb2..e2a87baf2f 100644
--- a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/evaluation.md
+++ b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/evaluation.md
@@ -0,0 +1,195 @@
+---
+title: 模型评测
+---
+
+## starwhale.PipelineHandler
+
+提供默认的模型评测过程定义，需要用户实现 `ppl` 和 `cmp` 函数。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/model.py)。
+
+```python
+from abc import ABCMeta, abstractmethod
+
+class PipelineHandler(metaclass=ABCMeta):
+    def __init__(self,
+        ignore_annotations: bool = False,
+        ignore_error: bool = False,
+    ) -> None:
+        ...
+
+    @abstractmethod
+    def ppl(self, data: Any, **kw: Any) -> Any:
+        raise NotImplementedError
+
+    @abstractmethod
+    def cmp(self, ppl_result: PPLResultIterator) -> Any
+        raise NotImplementedError
+```
+
+`PipelineHandler` 类实例化时可以定义两个参数：当`ignore_annotations`为False时，PPLResultIterator中会携带数据集所对应的 annotations信息，保证index上与推理结果是一一对应的；当 `ignore_error`为True是，会忽略ppl过程中的错误，可以解决比较大的数据集样本中，有个别数据错误导致ppl失败，进而导致无法完成评测的问题。
+
+`ppl` 函数用来进行推理，输入参数为 data和kw。data表示数据集中某个样本，kw为一个字典，目前包含 `annotations` 和 `index`。每条数据集样本都会调用`ppl`函数，输出为模型推理值，会自动被记录和存储，可以在cmp函数中通过 `ppl_result` 参数获取。
+
+`cmp` 函数一般用来进行推理结果的汇总，并产生最终的评测报告数据，只会调用一次。`cmp` 函数的参数为 `ppl_result` ，该值是 `PPLResultIterator` 类型，可以被迭代。迭代出来的对象为一个字典，包含 `result`, `annotations` 和 `data_id` 三个元素。`result` 为 `ppl` 返回的元素，由于使用了 pickle做序列化-反序列化，data["result"] 变量直接能获取ppl函数return的值；`annotations` 为构建数据集时写入的，此阶段的result["annotations"]为一个dict类型。`data_id` 表示数据集对应的index。
+
+另外，在PipelineHandler及其子类中可以访问 `self.context` 获取 `starwhale.Context` 类型的上下文信息。
+
+常见的使用方法示例如下：
+
+```python
+
+class Example(PipelineHandler):
+    def __init__(self) -> None:
+        super().__init__()
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = self._load_model(self.device)
+
+    def ppl(self, img: Image, **kw):
+        data_tensor = self._pre(img)
+        output = self.model(data_tensor)
+        return self._post(output)
+
+    def cmp(self, ppl_result):
+        result, label, pr = [], [], []
+        for _data in ppl_result:
+            label.append(_data["annotations"]["label"])
+            result.extend(_data["result"][0])
+            pr.extend(_data["result"][1])
+        return label, result, pr
+
+    def _pre(self, input: Image) -> torch.Tensor:
+        ...
+
+    def _post(self, input):
+        ...
+
+    def _load_model(self, device):
+        ...
+```
+
+## starwhale.Context
+
+执行模型评测过程中传入的上下文信息，包括Project、Task ID等。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/job.py)。Context的内容是自动注入的，用户通过 `@pass_context` 使用context，或在 继承 `PipelineHandler` 类内使用，目前Context可以获得如下值：
+
+```python
+
+@pass_context
+def func(ctx: Context):
+    ...
+    print(ctx.project)
+    print(ctx.version)
+    print(ctx.step)
+    ...
+
+Context(
+    workdir: Path,
+    step: str = "",
+    total: int = 1,
+    index: int = 0,
+    dataset_uris: t.List[str] = [],
+    version: str = "",
+    project: str = "",
+)
+```
+
+|参数|说明|
+|---|----|
+|project|project名字|
+|version|Evaluation 版本号|
+|step|step名字|
+|total|step下所有的task数量|
+|index|当前task的索引编号，从零开始|
+|dataset_uris|dataset uri字符串的列表|
+|workdir|model.yaml所在目录|
+
+## starwhale.PPLResultIterator
+
+`cmp`函数中使用，是一个可迭代的对象，能够输出 `ppl` 结果，数据集index和对应的数据集annotations。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/model.py)。
+
+```python
+from starwhale import PipelineHandler, PPLResultIterator
+
+class Example(PipelineHandler):
+    def cmp(
+        self, ppl_result: PPLResultIterator
+    ) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
+        result, label, pr = [], [], []
+        for _data in ppl_result:
+            label.append(_data["annotations"]["label"])
+            result.extend(_data["result"][0])
+            pr.extend(_data["result"][1])
+            print(_data["data_id"])
+        return label, result, pr
+
+```
+
+## starwhale.multi_classification
+
+修饰器，适用于多分类问题，用来简化cmp结果的进一步计算和结果存储，能更好的呈现评测结果。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/metric.py)。
+
+```python
+
+@multi_classification(
+    confusion_matrix_normalize="all",
+    show_hamming_loss=True,
+    show_cohen_kappa_score=True,
+    show_roc_auc=True,
+    all_labels=[i for i in range(0, 10)],
+)
+def cmp(ppl_result: PPLResultIterator) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
+    label, result, probability_matrix = [], [], []
+    return label, result, probability_matrix
+
+@multi_classification(
+    confusion_matrix_normalize="all",
+    show_hamming_loss=True,
+    show_cohen_kappa_score=True,
+    show_roc_auc=False,
+    all_labels=[i for i in range(0, 10)],
+)
+def cmp(ppl_result: PPLResultIterator) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
+    label, result = [], [], []
+    return label, result
+```
+
+|参数|说明|
+|---|----|
+|`confusion_matrix_normalize`| `true`(rows), `pred`(columns) 或 `all`(rows+columns) |
+|`show_hamming_loss`|是否计算hamming loss|
+|`show_cohen_kappa_score`|是否计算 cohen kappa score|
+|`show_roc_auc`|是否计算roc/auc, 计算的时候，需要函数返回(label，result, probability_matrix) 三元组，否则只需返回(label, result) 两元组即可|
+|all_labels|所有的labels|
+
+`multi_classification` 修饰器使用sklearn lib对多分类问题进行结果分析，输出confusion matrix, roc, auc等值，并且会写入到 starwhale的 DataStore 中。使用的时候需要对所修饰的函数返回值有一定要求，返回(label, result, probability_matrix) 或 (label, result)。
+
+## starwhale.step
+
+修饰器，可以指定DAG的依赖关系和Task数量、资源等配置，实现用户自定义评测过程。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/api/_impl/job.py)。使用 `step` 可以完全不依赖于 `PipelineHandler` 预定义的基本模型评测过程，可以自行定义多阶段和每个阶段的依赖、资源和任务并发数等。
+
+```python
+@step(
+    resources: Optional[List[str]] = None,
+    concurrency: int = 1,
+    task_num: int = 1,
+    needs: Optional[List[str]] = None,
+)
+def func():
+    ...
+
+```
+
+|参数|说明|
+|---|----|
+|`resources`|该step中每个task所依赖的资源情况|
+|`concurrency`|task执行的并发度|
+|`task_num`|step会被分成task的数量|
+|`needs`|依赖的step列表|
+
+`resources` 格式为 {名称}:{数量}。名称为资源的种类，目前支持 `cpu`、`gpu` 和 `memory`。当种类为 `cpu` 时，数量的类型为float, 没有单位，1表示1个cpu core，对应Kubernetes resource的request；当种类为 `gpu` 时，数量的类型为int，没有单位，1表示1个gpu，对应Kubernetes resource的request和limit；当种类为 `memory`时，数量的类型为float，没有单位，1表示1MB内存，对应Kubernetes resource的request。`resources` 使用列表的方式支持指定多个资源，且这些资源都满足时才会进行调度。当不写 `resources` 时，会使用所在Kubernetes的cpu、memory默认值。 `resources` 表示的是一个task执行的时所需要的资源情况，并不是step所有task的资源总和限制。**目前 `resources` 只在Cloud Instance中生效**。 `resources` 使用例子如下：
+
+```python
+@step()
+@step(resources=["cpu=1"])
+@step(resources=["gpu=1"])
+@step(resources=["memory=100"])
+@step(resources=["cpu=0.1", "gpu=1", "memory=100"])
+```
diff --git a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/other.md b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/other.md
new file mode 100644
index 0000000000..b1ff31f7ee
--- /dev/null
+++ b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/other.md
@@ -0,0 +1,53 @@
+---
+title: 其他SDK
+---
+
+## starwhale.\__version__
+
+Starwhale SDK和Cli版本，是字符串常量。
+
+```python
+>>> from starwhale import __version__
+>>> print(__version__)
+0.3.0rc10
+```
+
+## starwhale.URI
+
+starwhale uri的类定义，可以将字符串转化成URI对象。Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/base/uri.py)。
+
+```python
+URI(
+    raw: str,
+    expected_type: str = URIType.UNKNOWN
+)
+```
+
+|参数|说明|
+|---|---|
+|`raw`| starwhale uri的字符串 |
+|`expected_type`| 可以对有歧义的uri字符串强制指定为某种类型 |
+
+```python
+>>> dataset_uri = URI("mnist/version/latest", expected_type=URIType.DATASET)
+>>> model_uri = URI("mnist/version/latest", expected_type=URIType.MODEL)
+>>> runtime_uri = URI("mnist/version/latest", expected_type=URIType.RUNTIME)
+>>> dataset_uri = URI("dataset/mnist/version/latest")
+```
+
+上面例子中，uri的原始字符串都是 `mnist/version/latest`，这是一个有歧义的URI，但当指定了 `expected_type` 参数后，可以明确指定为预期的URI。
+
+## starwhale.URIType
+
+描述 `starwhale.URI` 类型，Github上的[代码链接](https://github.com/star-whale/starwhale/blob/dc6e6fdeae2f7c5bd0e72ccd8fb50768b1ce0826/client/starwhale/base/type.py)。
+
+```python
+class URIType:
+    INSTANCE = "instance"
+    PROJECT = "project"
+    MODEL = "model"
+    DATASET = "dataset"
+    RUNTIME = "runtime"
+    EVALUATION = "evaluation"
+    UNKNOWN = "unknown"
+```
diff --git a/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/overview.md b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/overview.md
new file mode 100644
index 0000000000..c8154b72f3
--- /dev/null
+++ b/docs/i18n/zh/docusaurus-plugin-content-docs/current/reference/sdk/overview.md
@@ -0,0 +1,43 @@
+---
+title: 基本信息
+---
+
+Starwhale 提供一系列的Python SDK，帮助用户更容易的制作数据集、调用模型评测、追踪和展示评测结果等。Python SDK多数场景下与YAML和CLI配合使用，完成模型评测等核心任务。
+
+## 类
+
+- `class PipelineHandler`: 提供默认的模型评测过程定义，需要用户实现 `ppl` 和 `cmp` 函数。
+- `class Context`: 执行模型评测过程中传入的上下文信息，包括Project、Task ID等。
+- `class SWDSBinBuildExecutor`: 提供swds格式的数据集构建类，需要用户实现 `iter_item` 函数。
+- `class UserRawBuildExecutor`: 提供remote-link和user-raw格式的数据集构建类，需要用户实现 `iter_item` 函数。
+- `class BuildExecutor`: `SWDSBinBuildExecutor` 类的别称，同为swds格式的数据集构建类。
+- `class PPLResultIterator`: `cmp`函数中使用，是一个可迭代的对象，能够输出 `ppl` 结果，数据集index和对应的数据集annotations。
+- `class URI`: starwhale uri的类定义，可以将字符串转化成URI对象。
+
+## 函数
+
+- `multi_classification`: 修饰器，适用于多分类问题，用来简化cmp结果的进一步计算和结果存储，能更好的呈现评测结果。
+- `step`: 修饰器，可以指定DAG的依赖关系和Task数量、资源等配置，实现用户自定义评测过程。
+- `get_data_loader`: 获取Starwhale Dataset的Data Loader，是一个可迭代的对象，能够获取数据集中具体样本的索引、data和annotations。
+
+## 数据类型
+
+- `COCOObjectAnnotation`: 提供COCO类型的定义。
+- `GrayscaleImage`: 灰度图类型，比如MNIST中数字手写体图片，是 `Image` 类型的一个特例。
+- `BoundingBox`: 边界框类型，目前为 `LTWH` 格式，即 `left_x`, `top_y`, `width` 和 `height`。
+- `ClassLabel`: 描述label的数量和类型。
+- `Image`: 图片类型。
+- `Audio`: 音频类型。
+- `Text`: 文本类型，默认为 `utf-8` 格式。
+- `Binary`: 二进制类型，用bytes存储。
+- `Link`: Link类型，用来制作 `remote-link` 和 `user-raw` 类型的数据集。
+- `S3LinkAuth`: 当数据存储在基于S3协议的对象存储上时，该类型负责描述授权、密钥信息。
+- `LocalFSLinkAuth`: 描述数据存储在本地文件系统上。
+- `DefaultS3LinkAuth`: 使用默认值初始化 `S3LinkAuth` 类型后得到的变量。
+- `MIMEType`: 描述Starwhale支持的多媒体类型，用在 `Image`、`Video` 等类型的mime_type 属性上，能更好的进行Dataset Viewer。
+- `LinkType`: 描述Starwhale支持的remote-link类型，目前支持 `LocalFS` 和 `S3` 两种类型。
+
+## 其他
+
+- `__version__`: Starwhale SDK和Cli版本，是字符串常量。
+- `URIType`: 描述 `starwhale.URI` 类型。
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 24c061efde..20e9ae0ee8 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -56,8 +56,11 @@ module.exports = {
                     "reference/cli/eval",
                     "reference/cli/utilities"],
                 "Python SDK": [
+                    "reference/sdk/overview",
+                    "reference/sdk/data_type",
                     "reference/sdk/dataset",
                     "reference/sdk/evaluation",
+                    "reference/sdk/other",
                 ],
             },
             "Community": [