Skip to content

Commit

Permalink
Update support batch delete storage design document (apache#4234)
Browse files Browse the repository at this point in the history
* Update delete index design document
  • Loading branch information
ZhangYu0123 authored Aug 18, 2020
1 parent 3359467 commit 8a3eaee
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 56 deletions.
78 changes: 40 additions & 38 deletions docs/en/internal/doris_storage_optimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Documents include:
- The file starts with an 8-byte magic code to identify the file format and version
- Data Region: Used to store data information for each column, where the data is loaded on demand by pages.
- Index Region: Doris stores the index data of each column in Index Region, where the data is loaded according to column granularity, so the data information of the following column is stored separately.
- Footer信息
- Footer
- FileFooterPB: Metadata Information for Definition Files
- Chesum of 4 bytes of footer Pb content
- Four bytes FileFooterPB message length for reading FileFooterPB
Expand Down Expand Up @@ -116,27 +116,29 @@ We generate a sparse index of short key every N rows (configurable) with the con
The format design supports the subsequent expansion of other index information, such as bitmap index, spatial index, etc. It only needs to write the required data to the existing column data, and add the corresponding metadata fields to FileFooterPB.

### Metadata Definition ###
FileFooterPB is defined as:
SegmentFooterPB is defined as:

```
message ColumnPB {
optional uint32 column_id = 1; // 这里使用column id,不使用column name是因为计划支持修改列名
optional string type = 2; // 列类型
optional string aggregation = 3; // 是否聚合
optional uint32 length = 4; // 长度
optional bool is_key = 5; // 是否是主键列
optional string default_value = 6; // 默认值
optional uint32 precision = 9 [default = 27]; // 精度
optional uint32 frac = 10 [default = 9];
optional bool is_nullable = 11 [default=false]; // 是否有null
optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
required int32 unique_id = 1; // The column id is used here, and the column name is not used
optional string name = 2; // Column name, when name equals __DORIS_DELETE_SIGN__, this column is a hidden delete column
required string type = 3; // Column type
optional bool is_key = 4; // Whether column is a primary key column
optional string aggregation = 5; // Aggregate type
optional bool is_nullable = 6; // Whether column is allowed to assgin null
optional bytes default_value = 7; // Defalut value
optional int32 precision = 8; // Precision of column
optional int32 frac = 9;
optional int32 length = 10; // Length of column
optional int32 index_length = 11; // Length of column index
optional bool is_bf_column = 12; // Whether column has bloom filter index
optional bool has_bitmap_index = 15 [default=false]; // Whether column has bitmap index
}
// page偏移
// page offset
message PagePointerPB {
required uint64 offset; // page在文件中的偏移
required uint32 length; // page的大小
required uint64 offset; // offset of page in segment file
required uint32 length; // length of page
}
message MetadataPairPB {
Expand All @@ -145,36 +147,36 @@ message MetadataPairPB {
}
message ColumnMetaPB {
optional ColumnMessage encoding; // 编码方式
optional ColumnMessage encoding; // Encoding of column
optional PagePointerPB dict_page // 词典page
repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息
optional PagePointerPB ordinal_index_page; // 行号索引数据
optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据
optional PagePointerPB dict_page // Dictionary page
repeated PagePointerPB bloom_filter_pages; // Bloom filter pages
optional PagePointerPB ordinal_index_page; // Ordinal index page
optional PagePointerPB page_zone_map_page; // Page level of statistics index data
optional PagePointerPB bitmap_index_page; // bitmap索引数据
optional PagePointerPB bitmap_index_page; // Bitmap index page
optional uint64 data_footprint; // 列中索引的大小
optional uint64 index_footprint; // 列中数据的大小
optional uint64 raw_data_footprint; // 原始列数据大小
optional uint64 data_footprint; // The size of the index in the column
optional uint64 index_footprint; // The size of the data in the column
optional uint64 raw_data_footprint; // Original column data size
optional CompressKind compress_kind; // 列的压缩方式
optional CompressKind compress_kind; // Column compression type
optional ZoneMapPB column_zone_map; //文件级别的过滤条件
optional ZoneMapPB column_zone_map; // Segment level of statistics index data
repeated MetadataPairPB column_meta_datas;
}
message FileFooterPB {
optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
repeated ColumnPB schema = 5; // 列Schema
optional uint64 num_values = 4; // 文件中保存的行数
optional uint64 index_footprint = 7; // 索引大小
optional uint64 data_footprint = 8; // 数据大小
optional uint64 raw_data_footprint = 8; // 原始数据大小
optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
repeated ColumnMetaPB column_metas = 10; // 列元数据
optional PagePointerPB key_index_page; // short key索引page
message SegmentFooterPB {
optional uint32 version = 2 [default = 1]; // For version compatibility and upgrade use
repeated ColumnPB schema = 5; // Schema of columns
optional uint64 num_values = 4; // Number of lines saved in the file
optional uint64 index_footprint = 7; // Index size
optional uint64 data_footprint = 8; // Data size
optional uint64 raw_data_footprint = 8; // Original data size
optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // Compression type
repeated ColumnMetaPB column_metas = 10; // Column metadata
optional PagePointerPB key_index_page = 11; // short key index page
}
```
Expand Down
38 changes: 20 additions & 18 deletions docs/zh-CN/internal/doris_storage_optimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,21 +115,23 @@ non-nullable data page结构如下:
该格式设计支持后续扩展其他的索引信息,比如bitmap索引,spatial索引等等,只需要将需要的数据写到现有的列数据后面,并且添加对应的元数据字段到FileFooterPB中

### 元数据定义 ###
FileFooterPB的定义为
SegmentFooterPB的定义为

```
message ColumnPB {
optional uint32 column_id = 1; // 这里使用column id,不使用column name是因为计划支持修改列名
optional string type = 2; // 列类型
optional string aggregation = 3; // 是否聚合
optional uint32 length = 4; // 长度
optional bool is_key = 5; // 是否是主键列
optional string default_value = 6; // 默认值
optional uint32 precision = 9 [default = 27]; // 精度
optional uint32 frac = 10 [default = 9];
optional bool is_nullable = 11 [default=false]; // 是否有null
optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
required int32 unique_id = 1; // 这里使用column id, 不使用column name是因为计划支持修改列名
optional string name = 2; // 列的名字, 当name为__DORIS_DELETE_SIGN__, 表示该列为隐藏的删除列
required string type = 3; // 列类型
optional bool is_key = 4; // 是否是主键列
optional string aggregation = 5; // 聚合方式
optional bool is_nullable = 6; // 是否有null
optional bytes default_value = 7; // 默认值
optional int32 precision = 8; // 精度
optional int32 frac = 9;
optional int32 length = 10; // 长度
optional int32 index_length = 11; // 索引长度
optional bool is_bf_column = 12; // 是否有bf词典
optional bool has_bitmap_index = 15 [default=false]; // 是否有bitmap索引
}
// page偏移
Expand Down Expand Up @@ -163,16 +165,16 @@ message ColumnMetaPB {
repeated MetadataPairPB column_meta_datas;
}
message FileFooterPB {
message SegmentFooterPB {
optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
repeated ColumnPB schema = 5; // 列Schema
optional uint64 num_values = 4; // 文件中保存的行数
optional uint64 index_footprint = 7; // 索引大小
optional uint64 data_footprint = 8; // 数据大小
optional uint64 num_values = 4; // 文件中保存的行数
optional uint64 index_footprint = 7; // 索引大小
optional uint64 data_footprint = 8; // 数据大小
optional uint64 raw_data_footprint = 8; // 原始数据大小
optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
repeated ColumnMetaPB column_metas = 10; // 列元数据
optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
repeated ColumnMetaPB column_metas = 10; // 列元数据
optional PagePointerPB key_index_page; // short key索引page
}
Expand Down

0 comments on commit 8a3eaee

Please sign in to comment.